diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2022-09-07 19:04:21 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2022-09-07 19:04:21 +0900 |
commit | c690d52bdd137ed6a17353aa7af35e8141ece77b (patch) | |
tree | dbb7dd99133132dfbffcb8c9e9af4f1ffc2f4808 /compiler | |
parent | 3ad689f0803519e343c36d5700646e86059df961 (diff) | |
download | nnfw-c690d52bdd137ed6a17353aa7af35e8141ece77b.tar.gz nnfw-c690d52bdd137ed6a17353aa7af35e8141ece77b.tar.bz2 nnfw-c690d52bdd137ed6a17353aa7af35e8141ece77b.zip |
Imported Upstream version 1.21.0upstream/1.21.0tizen_7.0_m2_releaseaccepted/tizen/unified/20220912.170817accepted/tizen/unified/20220912.164738accepted/tizen/7.0/unified/hotfix/20221116.105341accepted/tizen/7.0/unified/20221110.060236tizen_7.0_hotfixtizen_7.0accepted/tizen_7.0_unified_hotfixaccepted/tizen_7.0_unified
Diffstat (limited to 'compiler')
1301 files changed, 60076 insertions, 3238 deletions
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h index 1703e421e..43f99dc5e 100644 --- a/compiler/arser/include/arser/arser.h +++ b/compiler/arser/include/arser/arser.h @@ -303,7 +303,7 @@ private: std::string _long_name; std::string _short_name; std::vector<std::string> _names; - std::string _type; + std::string _type = "string"; std::string _help_message; std::function<void(void)> _func; uint32_t _nargs{1}; @@ -540,16 +540,20 @@ public: /* ** print usage */ + auto print_usage_arg = [&](const arser::Argument &arg) { + stream << " "; + std::string arg_name = arser::internal::remove_dash(arg._long_name); + std::for_each(arg_name.begin(), arg_name.end(), + [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); }); + }; stream << "Usage: ./" << parser._program_name << " "; // required optional argument for (const auto &arg : parser._optional_arg_vec) { if (!arg._is_required) continue; - stream << arg._short_name << " "; - std::string arg_name = arser::internal::remove_dash(arg._long_name); - std::for_each(arg_name.begin(), arg_name.end(), - [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); }); + stream << arg._short_name; + print_usage_arg(arg); stream << " "; } // rest of the optional argument @@ -560,10 +564,7 @@ public: stream << "[" << arg._short_name; if (arg._nargs) { - stream << " "; - std::string arg_name = arser::internal::remove_dash(arg._long_name); - std::for_each(arg_name.begin(), arg_name.end(), - [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); }); + print_usage_arg(arg); } stream << "]" << " "; @@ -591,39 +592,28 @@ public: } const size_t message_width = 60; - // positional argument - if (!parser._positional_arg_vec.empty()) - { - stream << "[Positional argument]" << std::endl; - for (const auto &arg : parser._positional_arg_vec) + auto print_help_args = [&](const std::list<Argument> &args, const std::string &title) { + if (!args.empty()) { - stream.width(length_of_longest_arg); - stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t"; - for (size_t i = 0; i < arg._help_message.length(); i += message_width) + stream << title << std::endl; + for (const auto &arg : args) { - if (i) - stream << std::string(length_of_longest_arg, ' ') << "\t"; - stream << arg._help_message.substr(i, message_width) << std::endl; + stream.width(length_of_longest_arg); + stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t"; + for (size_t i = 0; i < arg._help_message.length(); i += message_width) + { + if (i) + stream << std::string(length_of_longest_arg, ' ') << "\t"; + stream << arg._help_message.substr(i, message_width) << std::endl; + } } + std::cout << std::endl; } - std::cout << std::endl; - } + }; + // positional argument + print_help_args(parser._positional_arg_vec, "[Positional argument]"); // optional argument - if (!parser._optional_arg_vec.empty()) - { - stream << "[Optional argument]" << std::endl; - for (const auto &arg : parser._optional_arg_vec) - { - stream.width(length_of_longest_arg); - stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t"; - for (size_t i = 0; i < arg._help_message.length(); i += message_width) - { - if (i) - stream << std::string(length_of_longest_arg, ' ') << "\t"; - stream << arg._help_message.substr(i, message_width) << std::endl; - } - } - } + print_help_args(parser._optional_arg_vec, "[Optional argument]"); return stream; } @@ -737,6 +727,29 @@ template <typename T> T Arser::get(const std::string &arg_name) return get_impl(arg_name, static_cast<T *>(nullptr)); } +class Helper +{ +public: + static void add_version(Arser &arser, const std::function<void(void)> &func) + { + arser.add_argument("--version") + .nargs(0) + .required(false) + .default_value(false) + .help("Show version information and exit") + .exit_with(func); + } + + static void add_verbose(Arser &arser) + { + arser.add_argument("-V", "--verbose") + .nargs(0) + .required(false) + .default_value(false) + .help("output additional information to stdout or stderr"); + } +}; + } // namespace arser #endif // __ARSER_H__ diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt index 4d86f8097..d5a62301c 100644 --- a/compiler/circle-eval-diff/CMakeLists.txt +++ b/compiler/circle-eval-diff/CMakeLists.txt @@ -6,6 +6,7 @@ list(REMOVE_ITEM SOURCES ${TESTS}) add_executable(circle-eval-diff ${DRIVER} ${SOURCES}) target_include_directories(circle-eval-diff PRIVATE include) +target_include_directories(circle-eval-diff PRIVATE src) target_link_libraries(circle-eval-diff arser) target_link_libraries(circle-eval-diff safemain) @@ -17,6 +18,8 @@ target_link_libraries(circle-eval-diff luci_interpreter) target_link_libraries(circle-eval-diff dio_hdf5) target_link_libraries(circle-eval-diff vconone) +install(TARGETS circle-eval-diff DESTINATION bin) + if(NOT ENABLE_TEST) return() endif(NOT ENABLE_TEST) @@ -25,10 +28,15 @@ endif(NOT ENABLE_TEST) # Instead, we use TEST_SOURCES to specify sources uesd for tests. set(TEST_SOURCES "src/MetricPrinter.cpp" - "src/Tensor.cpp") + "src/Tensor.cpp" + "src/InputDataLoader.cpp") nnas_find_package(GTest REQUIRED) GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES}) +target_include_directories(circle_eval_diff_test PRIVATE include) target_include_directories(circle_eval_diff_test PRIVATE src) target_link_libraries(circle_eval_diff_test luci_testhelper) target_link_libraries(circle_eval_diff_test nncc_coverage) +target_link_libraries(circle_eval_diff_test dio_hdf5) +target_link_libraries(circle_eval_diff_test loco) +target_link_libraries(circle_eval_diff_test luci_lang) diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp index f4a12a403..7e63ec88c 100644 --- a/compiler/circle-eval-diff/driver/Driver.cpp +++ b/compiler/circle-eval-diff/driver/Driver.cpp @@ -30,19 +30,15 @@ std::string to_lower_case(std::string s) return s; } -Metric to_metric(const std::string &str) -{ - if (to_lower_case(str).compare("mae") == 0) - return Metric::MAE; - - throw std::runtime_error("Unsupported metric."); -} - InputFormat to_input_format(const std::string &str) { - if (to_lower_case(str).compare("h5") == 0) + auto small_str = to_lower_case(str); + if (small_str.compare("h5") == 0) return InputFormat::H5; + if (small_str.compare("directory") == 0 || small_str.compare("dir") == 0) + return InputFormat::DIR; + throw std::runtime_error("Unsupported input format."); } @@ -58,50 +54,50 @@ int entry(const int argc, char **argv) { arser::Arser arser("Compare inference results of two circle models"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); + arser::Helper::add_version(arser, print_version); - arser.add_argument("--first_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("First input model filepath"); + arser.add_argument("--first_model").required(true).help("First input model filepath"); - arser.add_argument("--second_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("Second input model filepath"); + arser.add_argument("--second_model").required(true).help("Second input model filepath"); arser.add_argument("--first_input_data") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Input data filepath for the first model. If not given, circle-eval-diff will run with " "randomly generated data"); arser.add_argument("--second_input_data") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Input data filepath for the second model. If not given, circle-eval-diff will run with " "randomly generated data"); - arser.add_argument("--metric") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .default_value("MAE") - .help("Metric for comparison (default: MAE)"); + arser.add_argument("--dump_output_with_prefix") + .help("Dump output to files. <prefix> should be given as an argument. " + "Outputs are saved in <prefix>.<data_index>.first.output<output_index> and " + "<prefix>.<data_index>.second.output<output_index>."); + + arser.add_argument("--print_mae").nargs(0).default_value(false).help("Print Mean Absolute Error"); + + arser.add_argument("--print_mape") + .nargs(0) + .default_value(false) + .help("Print Mean Absolute PercentageError"); + + arser.add_argument("--print_mpeir") + .nargs(0) + .default_value(false) + .help("Print Mean Peak Error to Interval Ratio"); + + arser.add_argument("--print_top1_match") + .nargs(0) + .default_value(false) + .help("Print Mean Top-1 Match Ratio"); + + arser.add_argument("--print_top5_match") + .nargs(0) + .default_value(false) + .help("Print Mean Top-5 Match Ratio"); + + arser.add_argument("--print_mse").nargs(0).default_value(false).help("Print Mean Squared Error"); arser.add_argument("--input_data_format") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("h5") .help("Input data format. h5/hdf5 (default) or directory"); @@ -124,6 +120,7 @@ int entry(const int argc, char **argv) std::string second_input_data_path; std::string metric; std::string input_data_format; + std::string output_prefix; if (arser["--first_input_data"]) first_input_data_path = arser.get<std::string>("--first_input_data"); @@ -135,22 +132,54 @@ int entry(const int argc, char **argv) throw std::runtime_error("Input data path should be given for both first_model and " "second_model, or neither must be given."); - metric = arser.get<std::string>("--metric"); + if (arser["--dump_output_with_prefix"]) + output_prefix = arser.get<std::string>("--dump_output_with_prefix"); + + // Set Metrics + std::vector<Metric> metrics; + if (arser["--print_mae"] and arser.get<bool>("--print_mae")) + { + metrics.emplace_back(Metric::MAE); + } + if (arser["--print_mape"] and arser.get<bool>("--print_mape")) + { + metrics.emplace_back(Metric::MAPE); + } + if (arser["--print_mpeir"] and arser.get<bool>("--print_mpeir")) + { + metrics.emplace_back(Metric::MPEIR); + } + if (arser["--print_top1_match"] and arser.get<bool>("--print_top1_match")) + { + metrics.emplace_back(Metric::MTOP1); + } + if (arser["--print_top5_match"] and arser.get<bool>("--print_top5_match")) + { + metrics.emplace_back(Metric::MTOP5); + } + if (arser["--print_mse"] and arser.get<bool>("--print_mse")) + { + metrics.emplace_back(Metric::MSE); + } + input_data_format = arser.get<std::string>("--input_data_format"); auto ctx = std::make_unique<CircleEvalDiff::Context>(); { ctx->first_model_path = first_model_path; ctx->second_model_path = second_model_path; - ctx->metric = to_metric(metric); + ctx->first_input_data_path = first_input_data_path; + ctx->second_input_data_path = second_input_data_path; + ctx->metric = metrics; ctx->input_format = to_input_format(input_data_format); + ctx->output_prefix = output_prefix; } CircleEvalDiff ced(std::move(ctx)); ced.init(); - ced.evalDiff(first_input_data_path, second_input_data_path); + ced.evalDiff(); return EXIT_SUCCESS; } diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h index bf6aff46d..7894480ac 100644 --- a/compiler/circle-eval-diff/include/CircleEvalDiff.h +++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h @@ -20,8 +20,12 @@ #include <luci/IR/Module.h> #include <luci_interpreter/Interpreter.h> +#include "InputDataLoader.h" +#include "MetricPrinter.h" + #include <string> #include <memory> +#include <vector> namespace circle_eval_diff { @@ -32,14 +36,12 @@ class ModuleEvalDiff; enum class Metric { Undefined, // For debugging - MAE, -}; - -enum class InputFormat -{ - Undefined, // For debugging - H5, - // TODO Implement Random, Directory + MAE, // Mean Absolute Error + MAPE, // Mean Percentage Absolute Error + MPEIR, // Mean Peak Error to Interval Ratio + MTOP1, // Mean Top-1 Match Ratio + MTOP5, // Mean Top-5 Match Ratio + MSE, // Mean Squared Error }; class CircleEvalDiff final @@ -49,8 +51,11 @@ public: { std::string first_model_path; std::string second_model_path; - Metric metric = Metric::Undefined; + std::string first_input_data_path; + std::string second_input_data_path; + std::vector<Metric> metric; InputFormat input_format = InputFormat::Undefined; + std::string output_prefix; }; public: @@ -61,12 +66,13 @@ public: void init(); // Evaluate two circle models for the given input data and compare the results - void evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const; + void evalDiff(void) const; private: std::unique_ptr<Context> _ctx; - std::unique_ptr<ModuleEvalDiff> _runner; + std::unique_ptr<luci::Module> _first_module; + std::unique_ptr<luci::Module> _second_module; + std::vector<std::unique_ptr<MetricPrinter>> _metrics; }; } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp index c39a11371..43e026bf6 100644 --- a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp +++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp @@ -15,8 +15,9 @@ */ #include "CircleEvalDiff.h" -#include "ModuleEvalDiff.h" +#include "InputDataLoader.h" #include "MetricPrinter.h" +#include "Tensor.h" #include <foder/FileLoader.h> #include <luci/Importer.h> @@ -26,6 +27,25 @@ namespace { +bool same_shape(const luci::CircleNode *a, const luci::CircleNode *b) +{ + if (a->rank() != b->rank()) + return false; + + for (uint32_t i = 0; i < a->rank(); i++) + { + if (not(a->dim(i) == b->dim(i))) + return false; + } + + return true; +} + +bool same_dtype(const luci::CircleNode *a, const luci::CircleNode *b) +{ + return a->dtype() == b->dtype(); +} + std::unique_ptr<luci::Module> import(const std::string &model_path) { // Load model from the file @@ -40,7 +60,12 @@ std::unique_ptr<luci::Module> import(const std::string &model_path) throw std::runtime_error("Failed to verify circle '" + model_path + "'"); } - auto module = luci::Importer().importModule(circle::GetModel(model_data.data())); + auto circle_model = circle::GetModel(model_data.data()); + + if (not circle_model) + throw std::runtime_error("Failed to load '" + model_path + "'"); + + auto module = luci::Importer().importModule(circle_model); if (not module) throw std::runtime_error("Failed to load '" + model_path + "'"); @@ -48,50 +73,192 @@ std::unique_ptr<luci::Module> import(const std::string &model_path) return module; } +const std::vector<loco::Node *> inputs_of(const luci::Module *module) +{ + return loco::input_nodes(module->graph()); +} + +const std::vector<loco::Node *> outputs_of(const luci::Module *module) +{ + return loco::output_nodes(module->graph()); +} + +void writeDataToFile(const std::string &filename, const char *data, size_t data_size) +{ + std::ofstream fs(filename, std::ofstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.write(data, data_size).fail()) + { + throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n"); + } +} + +void checkOutputs(const luci::Module *first, const luci::Module *second) +{ + const auto first_output = outputs_of(first); + const auto second_output = outputs_of(second); + + if (first_output.size() != second_output.size()) + throw std::runtime_error("Models have different output counts"); + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]); + const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]); + + if (not same_shape(first_node, second_node)) + throw std::runtime_error("Output shape mismatch (" + first_node->name() + ", " + + second_node->name() + ")"); + + if (not same_dtype(first_node, second_node)) + throw std::runtime_error("Output dtype mismatch (" + first_node->name() + ", " + + second_node->name() + ")"); + } +} + } // namespace namespace circle_eval_diff { -CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) - : _ctx(std::move(ctx)), _runner(nullptr) +std::vector<std::shared_ptr<Tensor>> interpret(const luci::Module *module, + const InputDataLoader::Data &data) +{ + auto interpreter = std::make_unique<luci_interpreter::Interpreter>(module); + + auto input_nodes = ::inputs_of(module); + auto output_nodes = ::outputs_of(module); + + for (uint32_t input_idx = 0; input_idx < data.size(); input_idx++) + { + auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]); + assert(input_node->index() == input_idx); + + auto input_data = data.at(input_idx); + interpreter->writeInputTensor(input_node, input_data.buffer(), input_data.byte_size()); + } + + interpreter->interpret(); + + std::vector<std::shared_ptr<Tensor>> outputs; + for (uint32_t output_idx = 0; output_idx < output_nodes.size(); output_idx++) + { + auto output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]); + assert(output_node->index() == output_idx); + + auto tensor = createEmptyTensor(output_node); + interpreter->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size()); + outputs.emplace_back(tensor); + } + + return outputs; +} + +CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) : _ctx(std::move(ctx)) { + // DO NOTHING } CircleEvalDiff::~CircleEvalDiff() = default; void CircleEvalDiff::init() { + _first_module = import(_ctx->first_model_path); + _second_module = import(_ctx->second_model_path); + + // Check modules have the same output signature (dtype/shape) + // Exception will be thrown if they have different signature + checkOutputs(_first_module.get(), _second_module.get()); + // Set metric std::unique_ptr<MetricPrinter> metric; - switch (_ctx->metric) + for (auto metric : _ctx->metric) { - case Metric::MAE: - metric = std::make_unique<MAEPrinter>(); - break; - default: - throw std::runtime_error("Unsupported metric."); + switch (metric) + { + case Metric::MAE: + { + _metrics.emplace_back(std::make_unique<MAEPrinter>()); + break; + } + case Metric::MAPE: + { + _metrics.emplace_back(std::make_unique<MAPEPrinter>()); + break; + } + case Metric::MPEIR: + { + _metrics.emplace_back(std::make_unique<MPEIRPrinter>()); + break; + } + case Metric::MTOP1: + { + _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(1)); + break; + } + case Metric::MTOP5: + { + _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(5)); + break; + } + case Metric::MSE: + { + _metrics.emplace_back(std::make_unique<MSEPrinter>()); + break; + } + default: + throw std::runtime_error("Unsupported metric."); + } + _metrics.back()->init(_first_module.get(), _second_module.get()); } +} - auto first_module = import(_ctx->first_model_path); - auto second_module = import(_ctx->second_model_path); +void CircleEvalDiff::evalDiff(void) const +{ + auto first_input_loader = circle_eval_diff::makeDataLoader( + _ctx->first_input_data_path, _ctx->input_format, ::inputs_of(_first_module.get())); + auto second_input_loader = circle_eval_diff::makeDataLoader( + _ctx->second_input_data_path, _ctx->input_format, ::inputs_of(_second_module.get())); - // Set runner - switch (_ctx->input_format) + for (uint32_t data_idx = 0; data_idx < first_input_loader->size(); data_idx++) { - case InputFormat::H5: - _runner = std::make_unique<H5InputEvalDiff>(std::move(first_module), std::move(second_module), - std::move(metric)); - break; - default: - throw std::runtime_error("Unsupported input format."); + std::cout << "Evaluating " << data_idx << "'th data" << std::endl; + + auto first_data = first_input_loader->get(data_idx); + auto second_data = second_input_loader->get(data_idx); + + auto first_output = interpret(_first_module.get(), first_data); + auto second_output = interpret(_second_module.get(), second_data); + + for (auto &metric : _metrics) + { + metric->accumulate(first_output, second_output); + } + + if (_ctx.get()->output_prefix.empty()) + continue; + + for (uint32_t i = 0; i < first_output.size(); i++) + { + auto out = first_output[i]; + writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + ".first.output" + + std::to_string(i), + (char *)(out->buffer()), out->byte_size()); + } + for (uint32_t i = 0; i < second_output.size(); i++) + { + auto out = second_output[i]; + writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + + ".second.output" + std::to_string(i), + (char *)(out->buffer()), out->byte_size()); + } } -} -void CircleEvalDiff::evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const -{ - _runner->evalDiff(first_input_data_path, second_input_data_path); + for (auto &metric : _metrics) + { + std::cout << metric.get() << std::endl; + } } } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/InputDataLoader.cpp b/compiler/circle-eval-diff/src/InputDataLoader.cpp new file mode 100644 index 000000000..99276f32a --- /dev/null +++ b/compiler/circle-eval-diff/src/InputDataLoader.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "InputDataLoader.h" + +#include <dio_hdf5/HDF5Importer.h> +#include <loco/IR/Graph.h> +#include <luci/IR/CircleNodes.h> + +#include <cstring> +#include <dirent.h> +#include <fstream> +#include <vector> + +using DataType = loco::DataType; +using Shape = std::vector<loco::Dimension>; + +namespace circle_eval_diff +{ + +// Check the type and the shape of CircleInput +void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape) +{ + // Type check + if (dtype != input_node->dtype()) + throw std::runtime_error("Wrong input type."); + + if (shape.size() != input_node->rank()) + throw std::runtime_error("Input rank mismatch."); + + for (uint32_t i = 0; i < shape.size(); i++) + { + if (not(shape.at(i) == input_node->dim(i))) + throw std::runtime_error("Input shape mismatch."); + } +} + +std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes) +{ + std::vector<size_t> vec; + + for (const auto node : nodes) + { + const auto input_node = loco::must_cast<const luci::CircleInput *>(node); + size_t element_size = 1; + + for (uint32_t index = 0; index < input_node->rank(); index++) + { + element_size *= input_node->dim(index).value(); + } + + vec.push_back(element_size); + } + + return vec; +} + +size_t getTotalByteSizeOf(const std::vector<loco::Node *> &nodes) +{ + size_t total_byte_size = 0; + + for (const auto node : nodes) + { + const auto input_node = loco::must_cast<const luci::CircleInput *>(node); + size_t byte_size = loco::size(input_node->dtype()); + + for (uint32_t index = 0; index < input_node->rank(); index++) + { + byte_size *= input_node->dim(index).value(); + } + + total_byte_size += byte_size; + } + + return total_byte_size; +} + +} // namespace circle_eval_diff + +namespace circle_eval_diff +{ + +HDF5Loader::HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes) + : _input_nodes{input_nodes} +{ + try + { + using HDF5Importer = dio::hdf5::HDF5Importer; + + _hdf5 = std::make_unique<HDF5Importer>(file_path); + _hdf5->importGroup("value"); + } + catch (const H5::Exception &e) + { + H5::Exception::printErrorStack(); + throw std::runtime_error("HDF5 error occurred."); + } +} + +uint32_t HDF5Loader::size(void) const { return _hdf5->numData(); } + +InputDataLoader::Data HDF5Loader::get(uint32_t data_idx) const +{ + Data data; + data.resize(_input_nodes.size()); + + for (uint32_t input_idx = 0; input_idx < _input_nodes.size(); input_idx++) + { + auto input_node = loco::must_cast<luci::CircleInput *>(_input_nodes.at(input_idx)); + assert(input_node->index() == input_idx); + + data.at(input_idx) = *createEmptyTensor(input_node).get(); + + auto input_buffer = data.at(input_idx).buffer(); + try + { + if (_hdf5->isRawData()) + { + _hdf5->readTensor(data_idx, input_idx, input_buffer); + } + else + { + DataType dtype; + Shape shape; + _hdf5->readTensor(data_idx, input_idx, &dtype, &shape, input_buffer); + + // Check the type and the shape of the input data is valid + verifyTypeShape(input_node, dtype, shape); + } + } + catch (const H5::Exception &e) + { + H5::Exception::printErrorStack(); + throw std::runtime_error("HDF5 error occurred."); + } + } + + return data; +} + +DirectoryLoader::DirectoryLoader(const std::string &dir_path, + const std::vector<loco::Node *> &input_nodes) + : _input_nodes{input_nodes} +{ + DIR *dir = opendir(dir_path.c_str()); + if (not dir) + { + throw std::runtime_error("Cannot open directory \"" + dir_path + "\"."); + } + + struct dirent *entry = nullptr; + const auto input_total_bytes = getTotalByteSizeOf(input_nodes); + while (entry = readdir(dir)) + { + // Skip if the entry is not a regular file + if (entry->d_type != DT_REG) + continue; + + _data_paths.push_back(dir_path + "/" + entry->d_name); + } + + closedir(dir); +} + +uint32_t DirectoryLoader::size(void) const { return _data_paths.size(); } + +InputDataLoader::Data DirectoryLoader::get(uint32_t data_idx) const +{ + // Read raw data + const auto input_total_bytes = getTotalByteSizeOf(_input_nodes); + std::vector<char> input_data(input_total_bytes); + const auto raw_data_path = _data_paths.at(data_idx); + std::ifstream fs(raw_data_path, std::ifstream::binary); + + if (fs.fail()) + { + throw std::runtime_error("Cannot open file \"" + raw_data_path + "\"."); + } + if (fs.read(input_data.data(), input_total_bytes).fail()) + { + throw std::runtime_error("Failed to read raw data from file \"" + raw_data_path + "\"."); + } + + // Make Tensor from raw data + auto input_data_cur = input_data.data(); + + Data data; + data.resize(_input_nodes.size()); + std::vector<size_t> input_bytes = getEachByteSizeOf(_input_nodes); + for (uint32_t index = 0; index < _input_nodes.size(); index++) + { + const auto input_node = loco::must_cast<const luci::CircleInput *>(_input_nodes.at(index)); + auto &tensor = data.at(index); + tensor = *createEmptyTensor(input_node).get(); + auto buffer = tensor.buffer(); + std::memcpy(buffer, input_data_cur, input_bytes.at(index)); + input_data_cur += input_bytes.at(index); + } + + return data; +} + +std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path, + const InputFormat &format, + const std::vector<loco::Node *> &input_nodes) +{ + switch (format) + { + case InputFormat::H5: + { + return std::make_unique<HDF5Loader>(file_path, input_nodes); + } + case InputFormat::DIR: + { + return std::make_unique<DirectoryLoader>(file_path, input_nodes); + } + default: + throw std::runtime_error{"Unsupported input format."}; + } +} + +} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/InputDataLoader.h b/compiler/circle-eval-diff/src/InputDataLoader.h new file mode 100644 index 000000000..14921b239 --- /dev/null +++ b/compiler/circle-eval-diff/src/InputDataLoader.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__ +#define __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__ + +#include <dio_hdf5/HDF5Importer.h> +#include <loco/IR/Node.h> +#include <luci/IR/CircleNodes.h> + +#include "Tensor.h" + +#include <memory> +#include <string> + +namespace circle_eval_diff +{ + +void verifyTypeShape(const luci::CircleInput *input_node, const loco::DataType &dtype, + const std::vector<loco::Dimension> &shape); + +} // namespace circle_eval_diff + +namespace circle_eval_diff +{ + +enum class InputFormat +{ + Undefined, // For debugging + H5, + DIR, // directory + // TODO Implement Random, Directory +}; + +class InputDataLoader +{ +public: + using Data = std::vector<Tensor>; + +public: + virtual ~InputDataLoader() = default; + +public: + virtual uint32_t size(void) const = 0; + +public: + virtual Data get(uint32_t data_idx) const = 0; +}; + +class HDF5Loader final : public InputDataLoader +{ +public: + HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes); + +public: + uint32_t size(void) const final; + Data get(uint32_t data_idx) const final; + +private: + const std::vector<loco::Node *> _input_nodes; + std::unique_ptr<dio::hdf5::HDF5Importer> _hdf5; +}; + +// This class loads the directory that has raw data binary files. +class DirectoryLoader final : public InputDataLoader +{ +public: + DirectoryLoader(const std::string &dir_path, const std::vector<loco::Node *> &input_nodes); + +public: + uint32_t size(void) const final; + Data get(uint32_t data_idx) const final; + +private: + const std::vector<loco::Node *> _input_nodes; + std::vector<std::string> _data_paths; +}; + +std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path, + const InputFormat &format, + const std::vector<loco::Node *> &input_nodes); + +} // namespace circle_eval_diff + +#endif // __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__ diff --git a/compiler/circle-eval-diff/src/InputDataLoader.test.cpp b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp new file mode 100644 index 000000000..cbe78797b --- /dev/null +++ b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include <luci/IR/CircleNodes.h> + +#include "InputDataLoader.h" + +using namespace circle_eval_diff; + +TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest) +{ + luci::CircleInput input; + input.dtype(loco::DataType::FLOAT32); + input.rank(4); + input.dim(0).set(1); + input.dim(1).set(3); + input.dim(2).set(3); + input.dim(3).set(2); + + loco::DataType right_data_type{loco::DataType::FLOAT32}; + std::vector<loco::Dimension> right_shape; + right_shape.emplace_back(1); + right_shape.emplace_back(3); + right_shape.emplace_back(3); + right_shape.emplace_back(2); + + EXPECT_NO_THROW(verifyTypeShape(&input, right_data_type, right_shape)); +} + +TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest_NEG) +{ + luci::CircleInput input; + input.dtype(loco::DataType::FLOAT32); + input.rank(4); + input.dim(0).set(1); + input.dim(1).set(4); + input.dim(2).set(4); + input.dim(3).set(2); + + loco::DataType right_data_type{loco::DataType::FLOAT32}; + loco::DataType wrong_data_type{loco::DataType::FLOAT16}; + std::vector<loco::Dimension> wrong_shape; + wrong_shape.emplace_back(1); + wrong_shape.emplace_back(3); + wrong_shape.emplace_back(3); + wrong_shape.emplace_back(2); + + EXPECT_ANY_THROW(verifyTypeShape(&input, right_data_type, wrong_shape)); + EXPECT_ANY_THROW(verifyTypeShape(&input, wrong_data_type, wrong_shape)); +} diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp index d65eb9b63..ec8408471 100644 --- a/compiler/circle-eval-diff/src/MetricPrinter.cpp +++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp @@ -18,6 +18,7 @@ #include <luci/IR/CircleNode.h> +#include <limits> #include <iostream> #include <cassert> @@ -30,6 +31,16 @@ using Tensor = circle_eval_diff::Tensor; namespace { +uint32_t num_elems(const luci::CircleNode *node) +{ + uint32_t res = 1; + + for (uint32_t i = 0; i < node->rank(); i++) + res *= node->dim(i).value(); + + return res; +} + template <typename T> bool same_shape(const T a, const T b) { if (a->rank() != b->rank()) @@ -44,6 +55,8 @@ template <typename T> bool same_shape(const T a, const T b) return true; } +template <typename T> bool same_dtype(const T a, const T b) { return a->dtype() == b->dtype(); } + template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor) { assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS @@ -97,7 +110,6 @@ void MAEPrinter::init(const luci::Module *first, const luci::Module *second) { const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]); const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]); - assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS // Create tensors to store intermediate results _intermediate.emplace_back(); @@ -180,6 +192,471 @@ void MAEPrinter::dump(std::ostream &os) const } } +// TODO Remove duplicate codes with MAEPrinter +void MAPEPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]); + const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]); + + // Create tensors to store intermediate results + _intermediate.emplace_back(); + _intermediate.at(i).dtype(loco::DataType::FLOAT32); + // NOTE Use both first_node and second_node to avoid release build break + _intermediate.at(i).rank(first_node->rank()); + uint32_t num_elems = 1; + for (uint32_t j = 0; j < second_node->rank(); j++) + { + _intermediate.at(i).dim(j) = second_node->dim(j); + num_elems *= second_node->dim(j).value(); + } + _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems); + + // Check the buffer is initilized with zero + for (uint32_t j = 0; j < num_elems; j++) + assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +// Accumulate |(a - b) / a| +void MAPEPrinter::accum_mean_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++) + { + const auto a_val = a->at<loco::DataType::FLOAT32>(i); + const auto b_val = b->at<loco::DataType::FLOAT32>(i); + _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) += + std::abs((a_val - b_val) / a_val); + } +} + +// Assumption +// first: the result of fp32 model +// second: the result of fake-quantized model +void MAPEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 and then compute absolute error + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_mean_absolute_error(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MAPEPrinter::dump(std::ostream &os) const +{ + os << "Mean Absolute Percentage Error (MAPE)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto &inter = _intermediate.at(output_idx); + assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS + const auto elem_count = inter.size<loco::DataType::FLOAT32>(); + + // Compute MAPE + float mape = 0.0; + for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++) + mape += inter.at<loco::DataType::FLOAT32>(elem_idx); + + mape = mape / elem_count; + mape = mape / _num_data; + mape *= 100.0; + + os << "MAPE for " << name << " is " << mape << "%" << std::endl; + } +} + +// TODO Remove duplicate codes with MAEPrinter +void MPEIRPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]); + const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]); + + // Create places to store intermediate results + _intermediate.emplace_back(0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +// Accumulate PEIR (Peak Error to Interval Ratio) +// PEIR = max(|a - b|) / (max(a) - min(a)) +// PEIR >= 0 (lower is better) +void MPEIRPrinter::accum_peir(uint32_t output_idx, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + + for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++) + { + const auto a_val = a->at<loco::DataType::FLOAT32>(i); + min = std::min(a_val, min); + max = std::max(a_val, max); + } + + float interval = max - min; + + // Corner case: All values are the same. We set interval = 1 in this case + if (interval == 0) + interval = 1.0; + + float peak_error = std::numeric_limits<float>::lowest(); + + for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++) + { + const auto a_val = a->at<loco::DataType::FLOAT32>(i); + const auto b_val = b->at<loco::DataType::FLOAT32>(i); + const auto error = std::abs(a_val - b_val); + peak_error = std::max(error, peak_error); + } + + _intermediate.at(output_idx) += peak_error / interval; +} + +// Assumption (when testing the accuracy of quantized model) +// first: the result of fp32 model +// second: the result of fake-quantized model +void MPEIRPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 for ease of computation + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_peir(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MPEIRPrinter::dump(std::ostream &os) const +{ + os << "Mean Peak Error to Interval Ratio (MPEIR)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto sum_of_peir = _intermediate.at(output_idx); + + // Compute MPEIR + float mpeir = sum_of_peir / _num_data; + + os << "MPEIR for " << name << " is " << mpeir << std::endl; + } +} + +// TODO Remove duplicate codes with MAEPrinter +void TopKMatchPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]); + const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]); + + // Create places to store intermediate results + _intermediate.emplace_back(0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + + // If num_elems of an output is less than k, + // the output index is added to the skip list + if (num_elems(first_node) < _k) + { + std::cout << "Top-" << _k << "metric for " << first_node->name() + << " is ignored, because it has elements less than " << _k << std::endl; + _skip_output.emplace_back(i); + } + } +} + +void TopKMatchPrinter::accum_topk_accuracy(uint32_t output_idx, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + // Find Top-k largest elements + // This implementation is a variant of "Method 2 (Use temporary array)" in + // https://www.geeksforgeeks.org/k-largestor-smallest-elements-in-an-array/ + // We sort top-k elements by value and index to ensure that the element with an earlier + // index comes first if multiple elements have the same value. + auto find_topk = [this](const std::shared_ptr<Tensor> &tensor) { + assert(_k <= tensor->size<loco::DataType::FLOAT32>()); // FIX_CALLER_UNLESS + + // first: value, second: index + std::vector<std::pair<float, uint32_t>> topk; + topk.resize(_k); + + // Initialize + for (uint32_t i = 0; i < _k; i++) + { + topk[i] = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i); + } + + // Input pair: (value, index) + // Return true if a has smaller value than b. If a and b have the same value, + // return true if a has larger index. + auto compare = [](const std::pair<float, uint32_t> &a, const std::pair<float, uint32_t> &b) { + if (a.first == b.first) + return a.second > b.second; + + return a.first < b.first; + }; + + for (uint32_t i = _k; i < tensor->size<loco::DataType::FLOAT32>(); i++) + { + auto val = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i); + + auto min = std::min_element(topk.begin(), topk.end(), compare); + if (compare(*min, val)) + { + // val is larger than min. Replace min with val. + auto min_index = std::distance(topk.begin(), min); + topk[min_index] = val; + } + } + + return topk; + }; + + auto first_topk = find_topk(a); + auto second_topk = find_topk(b); + + uint32_t matched = 0; + for (uint32_t i = 0; i < _k; i++) + { + for (uint32_t j = 0; j < _k; j++) + { + if (first_topk[i].second == second_topk[j].second) + { + matched++; + break; + } + } + } + + float matched_ratio = static_cast<float>(matched) / _k; + + _intermediate.at(output_idx) += matched_ratio; +} + +bool TopKMatchPrinter::in_skip_list(uint32_t output_index) const +{ + for (auto skip : _skip_output) + { + if (output_index == skip) + return true; + } + + return false; +} + +void TopKMatchPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + if (in_skip_list(output_idx)) + continue; + + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 for ease of computation + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_topk_accuracy(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void TopKMatchPrinter::dump(std::ostream &os) const +{ + os << "Ratio of Matched Indices between Top-" << _k << " results of the models" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + if (in_skip_list(output_idx)) + continue; + + const auto name = _output_names.at(output_idx); + const auto sum_of_topk_accuracy = _intermediate.at(output_idx); + + // Compute TopKMatch + float mean_topk = sum_of_topk_accuracy / _num_data; + + os << "Mean Top-" << _k << " match ratio for " << name << " is " << mean_topk << std::endl; + } +} + +void MSEPrinter::init(const luci::Module *first, const luci::Module *second) +{ + THROW_UNLESS(first != nullptr, "Invalid module."); + THROW_UNLESS(second != nullptr, "Invalid module."); + + const auto first_output = loco::output_nodes(first->graph()); + const auto second_output = loco::output_nodes(second->graph()); + + assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < first_output.size(); i++) + { + const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]); + const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]); + + // Create tensors to store intermediate results + _intermediate.emplace_back(); + _intermediate.at(i).dtype(loco::DataType::FLOAT32); + // NOTE Use both first_node and second_node to avoid release build break + _intermediate.at(i).rank(first_node->rank()); + uint32_t num_elems = 1; + for (uint32_t j = 0; j < second_node->rank(); j++) + { + _intermediate.at(i).dim(j) = second_node->dim(j); + num_elems *= second_node->dim(j).value(); + } + _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems); + + // Check the buffer is initilized with zero + for (uint32_t j = 0; j < num_elems; j++) + assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0); + + // Save output names for logging + _output_names.emplace_back(first_node->name()); + } +} + +void MSEPrinter::accum_squared_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b) +{ + assert(a->dtype() == loco::DataType::FLOAT32 and + b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS + assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS + assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++) + { + _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) += + (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)) * + (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)); + } +} + +void MSEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second) +{ + assert(first.size() == second.size()); // FIX_CALLER_UNLESS + assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto first_output = first[output_idx]; + const auto second_output = second[output_idx]; + + // Cast data to fp32 and then compute absolute error + const auto fp32_first_output = fp32(first_output); + const auto fp32_second_output = fp32(second_output); + + accum_squared_error(output_idx, fp32_first_output, fp32_second_output); + } + + _num_data++; +} + +void MSEPrinter::dump(std::ostream &os) const +{ + os << "Mean Squared Error (MSE)" << std::endl; + + for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++) + { + const auto name = _output_names.at(output_idx); + const auto &inter = _intermediate.at(output_idx); + assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS + const auto elem_count = inter.size<loco::DataType::FLOAT32>(); + + // Compute MSE + float mse = 0.0; + for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++) + mse += inter.at<loco::DataType::FLOAT32>(elem_idx); + + mse = mse / elem_count; + mse = mse / _num_data; + + os << "MSE for " << name << " is " << mse << std::endl; + } +} + } // namespace circle_eval_diff #undef THROW_UNLESS diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h index b51581c31..c8f27511c 100644 --- a/compiler/circle-eval-diff/src/MetricPrinter.h +++ b/compiler/circle-eval-diff/src/MetricPrinter.h @@ -85,6 +85,133 @@ private: uint32_t _num_data = 0; }; +// Mean Squared Error +class MSEPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second); + + void dump(std::ostream &os) const; + +private: + void accum_squared_error(uint32_t index, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b); + +private: + // Store accumulated sum of absolute error for each output + std::vector<Tensor> _intermediate; + std::vector<std::string> _output_names; + uint32_t _num_data = 0; +}; + +// Mean Absolute Percentage Error +class MAPEPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second); + + void dump(std::ostream &os) const; + +private: + void accum_mean_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b); + +private: + // Store accumulated sum of absolute error for each output + std::vector<Tensor> _intermediate; + std::vector<std::string> _output_names; + uint32_t _num_data = 0; +}; + +// Mean Peak Error to Interval Ratio (PEIR) +// PEIR = max(|a - b|) / (max(a) - min(a)) +// PEIR >= 0 (lower is better) +// +// When testing the accuracy of quantized model, +// the first model should be the original fp32 model, and +// the second model should be the fake-quantized fp32 model +class MPEIRPrinter final : public MetricPrinter +{ +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second); + + void dump(std::ostream &os) const; + +private: + void accum_peir(uint32_t index, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b); + +private: + // Store accumulated sum of PEIR for each output + std::vector<float> _intermediate; + std::vector<std::string> _output_names; + uint32_t _num_data = 0; +}; + +// Ratio of matched indices between top-k results of two models (a, b). +// +// top-k match = intersection(top_k_idx(a), top_k_idx(b)) / k +// mean top-k match = sum(top-k match) / num_data +// +// For example, +// num_data = 2 +// first model output = [1, 2, 3], [2, 3, 1] +// second model output = [2, 4, 6], [3, 2, 1] +// +// if k = 1, +// first model top-1 index = ([2], [1]) +// second model top-1 index = ([2], [0]) +// mean top-1 accuracy = (1 + 0) / 2 = 0.5 +// +// if k = 2, +// first model output = [1, 2, 3], [2, 3, 1] +// second model output = [2, 4, 6], [3, 2, 1] +// first model top-2 index = ([2, 1], [1, 0]) +// second model top-2 index = ([2, 1], [0, 1]) +// mean top-2 accuracy = (2 + 2) / 4 = 1 +// +// NOTE Order of elements is ignored when comparing two top-k sets. +// NOTE If two elements have the same value and only one can be included in top-k, +// the one with an earlier index will be included. +class TopKMatchPrinter : public MetricPrinter +{ +public: + TopKMatchPrinter(uint32_t k) : _k(k) {} + +public: + void init(const luci::Module *first, const luci::Module *second); + + void accumulate(const std::vector<std::shared_ptr<Tensor>> &first, + const std::vector<std::shared_ptr<Tensor>> &second); + + void dump(std::ostream &os) const; + +private: + void accum_topk_accuracy(uint32_t index, const std::shared_ptr<Tensor> &a, + const std::shared_ptr<Tensor> &b); + + // Return true if the output is in the skip list (_skip_output) + bool in_skip_list(uint32_t output_index) const; + +private: + const uint32_t _k = 0; + // Store accumulated accuracy + std::vector<float> _intermediate; + std::vector<std::string> _output_names; + uint32_t _num_data = 0; + // Save index of output whose num_elements is less than k + std::vector<uint32_t> _skip_output; +}; + } // namespace circle_eval_diff #endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__ diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp index 51ca89799..0e71b80cc 100644 --- a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp +++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp @@ -180,6 +180,23 @@ std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, flo return tensor; } +std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, + std::vector<float> &value) +{ + auto outputs = loco::output_nodes(module->graph()); + assert(outputs.size() == 1); + auto output = *outputs.begin(); + auto output_cnode = loco::must_cast<luci::CircleNode *>(output); + auto tensor = create_empty_tensor(output_cnode); + auto tensor_size = tensor->size<loco::DataType::FLOAT32>(); + assert(tensor_size == value.size()); + for (uint32_t i = 0; i < tensor_size; i++) + { + tensor->at<loco::DataType::FLOAT32>(i) = value[i]; + } + return tensor; +} + } // namespace namespace circle_eval_diff @@ -233,4 +250,299 @@ TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG) EXPECT_ANY_THROW(mae.init(nullptr, nullptr)); } +TEST(CircleEvalMetricPrinterTest, MAPE_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MAPEPrinter mape; + + mape.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + auto output = output_tensor_with_value(&first, 2.0); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + auto output = output_tensor_with_value(&second, 1.0); + second_result.emplace_back(output); + } + + mape.accumulate(first_result, second_result); + + std::stringstream ss; + mape.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MAPE for output_0 is 50%")); +} + +TEST(CircleEvalMetricPrinterTest, MAPE_init_with_null_NEG) +{ + MAPEPrinter mape; + + EXPECT_ANY_THROW(mape.init(nullptr, nullptr)); +} + +TEST(CircleEvalMetricPrinterTest, MPEIR_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MPEIRPrinter mpeir; + + mpeir.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + std::vector<float> val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i; + + auto output = output_tensor_with_value(&first, val); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + auto output = output_tensor_with_value(&second, 0.0); + second_result.emplace_back(output); + } + + mpeir.accumulate(first_result, second_result); + + std::stringstream ss; + mpeir.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MPEIR for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, MPEIR_init_with_null_NEG) +{ + MPEIRPrinter mpeir; + + EXPECT_ANY_THROW(mpeir.init(nullptr, nullptr)); +} + +TEST(CircleEvalMetricPrinterTest, TopK_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + TopKMatchPrinter top5(5); + + top5.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + std::vector<float> val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i; + + auto output = output_tensor_with_value(&first, val); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + std::vector<float> val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i * 2; + auto output = output_tensor_with_value(&second, val); + second_result.emplace_back(output); + } + + top5.accumulate(first_result, second_result); + + std::stringstream ss; + top5.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, TopK_tie) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + TopKMatchPrinter top5(5); + + top5.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + std::vector<float> val; + val.resize(16); + for (uint32_t i = 0; i < 16; i++) + val[i] = i; + + auto output = output_tensor_with_value(&first, val); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + std::vector<float> val{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 16}; + + auto output = output_tensor_with_value(&second, val); + second_result.emplace_back(output); + } + + top5.accumulate(first_result, second_result); + + std::stringstream ss; + top5.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 0.8")); +} + +TEST(CircleEvalMetricPrinterTest, TopK_num_elem_less_than_k_NEG) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + TopKMatchPrinter top100(100); + + top100.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + auto output = output_tensor_with_value(&first, 0); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + auto output = output_tensor_with_value(&second, 0); + second_result.emplace_back(output); + } + + top100.accumulate(first_result, second_result); + + std::stringstream ss; + top100.dump(ss); + std::string result = ss.str(); + + EXPECT_EQ(std::string::npos, result.find("Mean Top-100 match ratio")); +} + +TEST(CircleEvalMetricPrinterTest, TopK_init_with_null_NEG) +{ + TopKMatchPrinter topk(5); + + EXPECT_ANY_THROW(topk.init(nullptr, nullptr)); +} + +TEST(CircleEvalMetricPrinterTest, MSE_simple) +{ + luci::Module first; + AddOneGraph first_g; + first_g.init(); + + first.add(std::move(first_g.graph())); + + luci::Module second; + AddTwoGraph second_g; + second_g.init(); + + second.add(std::move(second_g.graph())); + + MSEPrinter mse; + + mse.init(&first, &second); + + // This test does not actually evaluate the modules, but create + // fake results. + std::vector<std::shared_ptr<Tensor>> first_result; + { + auto output = output_tensor_with_value(&first, 1.0); + first_result.emplace_back(output); + } + + std::vector<std::shared_ptr<Tensor>> second_result; + { + auto output = output_tensor_with_value(&second, 2.0); + second_result.emplace_back(output); + } + + mse.accumulate(first_result, second_result); + + std::stringstream ss; + mse.dump(ss); + std::string result = ss.str(); + + EXPECT_NE(std::string::npos, result.find("MSE for output_0 is 1")); +} + +TEST(CircleEvalMetricPrinterTest, MSE_init_with_null_NEG) +{ + MSEPrinter mse; + + EXPECT_ANY_THROW(mse.init(nullptr, nullptr)); +} + } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp deleted file mode 100644 index 85f985873..000000000 --- a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ModuleEvalDiff.h" -#include "Tensor.h" - -#include <luci_interpreter/Interpreter.h> -#include <dio_hdf5/HDF5Importer.h> - -#include <string> -#include <stdexcept> -#include <iostream> -#include <cassert> - -using Tensor = circle_eval_diff::Tensor; -using DataType = loco::DataType; -using Shape = std::vector<loco::Dimension>; -using HDF5Importer = dio::hdf5::HDF5Importer; - -namespace -{ - -// Check the type and the shape of CircleInput -void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape) -{ - // Type check - if (dtype != input_node->dtype()) - throw std::runtime_error("Wrong input type."); - - if (shape.size() != input_node->rank()) - throw std::runtime_error("Input rank mismatch."); - - for (uint32_t i = 0; i < shape.size(); i++) - { - if (not(shape.at(i) == input_node->dim(i))) - throw std::runtime_error("Input shape mismatch."); - } -} - -// Return number of elements of the node. -uint32_t numElements(const luci::CircleNode *node) -{ - uint32_t num_elem = 1; - for (uint32_t i = 0; i < node->rank(); ++i) - num_elem *= node->dim(i).value(); - return num_elem; -} - -// Return Tensor which has the same dtype and shape with node. -// Buffer does not have any data yet. -std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node) -{ - auto tensor = std::make_shared<Tensor>(); - { - tensor->dtype(node->dtype()); - tensor->rank(node->rank()); - for (uint32_t i = 0; i < node->rank(); i++) - tensor->dim(i) = node->dim(i); - - switch (node->dtype()) - { - case loco::DataType::FLOAT32: - tensor->size<loco::DataType::FLOAT32>(numElements(node)); - break; - case loco::DataType::U8: - tensor->size<loco::DataType::U8>(numElements(node)); - break; - case loco::DataType::S16: - tensor->size<loco::DataType::S16>(numElements(node)); - break; - case loco::DataType::S32: - tensor->size<loco::DataType::S32>(numElements(node)); - break; - case loco::DataType::S64: - tensor->size<loco::DataType::S64>(numElements(node)); - break; - default: - throw std::runtime_error("Unsupported input tensor dtype for " + node->name()); - } - } - - return tensor; -} - -} // namespace - -namespace circle_eval_diff -{ - -void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const -{ - const auto interp = std::make_unique<luci_interpreter::Interpreter>(_first_module.get()); - - _metric->init(_first_module.get(), _second_module.get()); - - try - { - HDF5Importer first_h5(first_input_data_path); - first_h5.importGroup("value"); - - HDF5Importer second_h5(second_input_data_path); - second_h5.importGroup("value"); - - const auto first_num_data = first_h5.numData(); - const auto second_num_data = second_h5.numData(); - - if (first_num_data != second_num_data) - throw std::runtime_error( - "Number of data in the first data file and the second data file mismatches."); - - if (first_num_data == 0) - throw std::runtime_error("Input data file does not contain any record."); - - const auto first_input_nodes = loco::input_nodes(_first_module->graph()); - const auto first_num_inputs = first_input_nodes.size(); - const auto first_output_nodes = loco::output_nodes(_first_module->graph()); - const auto first_num_outputs = first_output_nodes.size(); - - const auto second_input_nodes = loco::input_nodes(_second_module->graph()); - const auto second_num_inputs = second_input_nodes.size(); - const auto second_output_nodes = loco::output_nodes(_second_module->graph()); - const auto second_num_outputs = second_output_nodes.size(); - - for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++) - { - std::cout << "Evaluating " << data_idx << "'th data" << std::endl; - - if (first_num_inputs != first_h5.numInputs(data_idx) || - second_num_inputs != second_h5.numInputs(data_idx)) - throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) + - "th data."); - - // Do inference and return output - auto eval = [&](HDF5Importer &h5, uint32_t num_inputs, - const std::vector<loco::Node *> &input_nodes, uint32_t num_outputs, - const std::vector<loco::Node *> &output_nodes) { - // Write input data - for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++) - { - const auto *input_node = - loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]); - assert(input_node->index() == input_idx); - - auto tensor = createEmptyTensor(input_node); - if (h5.isRawData()) - { - h5.readTensor(data_idx, input_idx, tensor->buffer()); - } - else - { - DataType dtype; - Shape shape; - h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer()); - - // Check the type and the shape of the input data is valid - verifyTypeShape(input_node, dtype, shape); - } - - interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size()); - } - - // Interpret - interp->interpret(); - - // Read output data - std::vector<std::shared_ptr<Tensor>> outputs; - for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++) - { - const auto *output_node = - loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]); - assert(output_node->index() == output_idx); - - auto tensor = createEmptyTensor(output_node); - interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size()); - outputs.emplace_back(tensor); - } - - return outputs; - }; - - auto first_output = - eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes); - auto second_output = eval(second_h5, second_num_inputs, second_input_nodes, - second_num_outputs, second_output_nodes); - - // Accumulate diffs - _metric->accumulate(first_output, second_output); - } - - std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl; - } - catch (const H5::Exception &e) - { - H5::Exception::printErrorStack(); - throw std::runtime_error("HDF5 error occurred."); - } - - // Print metric - std::cout << _metric.get() << std::endl; -} - -} // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h deleted file mode 100644 index c7642f60b..000000000 --- a/compiler/circle-eval-diff/src/ModuleEvalDiff.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ -#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ - -#include "MetricPrinter.h" - -#include <luci/IR/Module.h> - -#include <memory> - -namespace circle_eval_diff -{ - -class ModuleEvalDiff -{ -public: - ModuleEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second, - std::unique_ptr<MetricPrinter> &&metric) - : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric)) - { - } - - virtual ~ModuleEvalDiff() = default; - - // Implement this in the child class - virtual void evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const = 0; - -protected: - std::unique_ptr<luci::Module> _first_module; - std::unique_ptr<luci::Module> _second_module; - std::unique_ptr<MetricPrinter> _metric; -}; - -class H5InputEvalDiff final : public ModuleEvalDiff -{ -public: - H5InputEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second, - std::unique_ptr<MetricPrinter> &&metric) - : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric)) - { - } - - void evalDiff(const std::string &first_input_data_path, - const std::string &second_input_data_path) const; -}; - -// TODO Implement ModuleEvalDiff for random input and directory input - -} // namespace circle_eval_diff - -#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__ diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp index 6710e8c3d..c3efc44cd 100644 --- a/compiler/circle-eval-diff/src/Tensor.cpp +++ b/compiler/circle-eval-diff/src/Tensor.cpp @@ -16,8 +16,24 @@ #include "Tensor.h" +#include <luci/IR/CircleNodeDecl.h> + #include <cassert> +namespace +{ + +// Return number of elements of the node. +uint32_t numElements(const luci::CircleNode *node) +{ + uint32_t num_elem = 1; + for (uint32_t i = 0; i < node->rank(); ++i) + num_elem *= node->dim(i).value(); + return num_elem; +} + +} // namespace + namespace circle_eval_diff { @@ -69,4 +85,40 @@ INSTANTIATE(loco::DataType::FLOAT32); #undef INSTANTIATE +// Return Tensor which has the same dtype and shape with node. +// Buffer does not have any data yet. +std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node) +{ + auto tensor = std::make_shared<Tensor>(); + { + tensor->dtype(node->dtype()); + tensor->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + tensor->dim(i) = node->dim(i); + + switch (node->dtype()) + { + case loco::DataType::FLOAT32: + tensor->size<loco::DataType::FLOAT32>(numElements(node)); + break; + case loco::DataType::U8: + tensor->size<loco::DataType::U8>(numElements(node)); + break; + case loco::DataType::S16: + tensor->size<loco::DataType::S16>(numElements(node)); + break; + case loco::DataType::S32: + tensor->size<loco::DataType::S32>(numElements(node)); + break; + case loco::DataType::S64: + tensor->size<loco::DataType::S64>(numElements(node)); + break; + default: + throw std::runtime_error("Unsupported input tensor dtype for " + node->name()); + } + } + + return tensor; +} + } // namespace circle_eval_diff diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h index 65ab60638..d4f65d951 100644 --- a/compiler/circle-eval-diff/src/Tensor.h +++ b/compiler/circle-eval-diff/src/Tensor.h @@ -18,6 +18,7 @@ #define __CIRCLE_EVAL_DIFF_TENSOR_H__ #include <loco.h> +#include <luci/IR/CircleNodeDecl.h> #include <vector> @@ -76,6 +77,8 @@ private: std::vector<uint8_t> _data; }; +std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node); + } // namespace circle_eval_diff #endif // __CIRCLE_EVAL_DIFF_TENSOR_H__ diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp index 3bdeaecdf..395865748 100644 --- a/compiler/circle-eval-diff/src/Tensor.test.cpp +++ b/compiler/circle-eval-diff/src/Tensor.test.cpp @@ -18,6 +18,8 @@ #include <gtest/gtest.h> +#include <luci/IR/CircleNodes.h> + using Tensor = circle_eval_diff::Tensor; namespace @@ -99,3 +101,29 @@ TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG) SUCCEED(); } + +TEST(CircleEvalDiffTensorTest, createEmptyTensorTest) +{ + luci::CircleInput input; + input.dtype(loco::DataType::FLOAT32); + input.rank(4); + input.dim(0).set(1); + input.dim(1).set(3); + input.dim(2).set(3); + input.dim(3).set(2); + + loco::DataType right_data_type{loco::DataType::FLOAT32}; + std::vector<loco::Dimension> right_shape; + right_shape.emplace_back(1); + right_shape.emplace_back(3); + right_shape.emplace_back(3); + right_shape.emplace_back(2); + + auto tensor = circle_eval_diff::createEmptyTensor(&input); + EXPECT_EQ(loco::DataType::FLOAT32, tensor->dtype()); + EXPECT_EQ(4, tensor->rank()); + EXPECT_EQ(1, tensor->dim(0)); + EXPECT_EQ(3, tensor->dim(1)); + EXPECT_EQ(3, tensor->dim(2)); + EXPECT_EQ(2, tensor->dim(3)); +} diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt index 2f657c171..da74e021d 100644 --- a/compiler/circle-execution-plan/CMakeLists.txt +++ b/compiler/circle-execution-plan/CMakeLists.txt @@ -1,3 +1,9 @@ +nnas_find_package(Jsoncpp) +if(NOT Jsoncpp_FOUND) + message(STATUS "Build circle-execution-plan: FAILED (missing jsoncpp)") + return() +endif(NOT Jsoncpp_FOUND) + set(SOURCES pal/IScratchpadHelper.h pal/ScratchpadHelperLinux.h @@ -10,6 +16,9 @@ set(SOURCES ) add_executable(circle_execution_plan "${SOURCES}") +target_include_directories(circle_execution_plan PRIVATE ${Jsoncpp_INCLUDE_DIRS}) + +target_link_libraries(circle_execution_plan ${Jsoncpp_STATIC_LIB}) target_link_libraries(circle_execution_plan foder) target_link_libraries(circle_execution_plan safemain) target_link_libraries(circle_execution_plan luci_env) diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp index 1788124c3..d5ddf0ce9 100644 --- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp +++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp @@ -33,20 +33,22 @@ int entry(int argc, char **argv) { arser::Arser arser("circle_execution_plan provides model with execution plan meta information"); - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); - arser.add_argument("--platform") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .default_value("linux") - .help("Platform name: linux mcu cmsisnn"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); + arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn"); arser.add_argument("--use_dsp") .nargs(1) .type(arser::DataType::BOOL) .required(false) .default_value(false) .help("Plan with or without dsp (now can be used only with cmsisnn)"); + arser.add_argument("--save_allocations") + .nargs(1) + .required(false) + .default_value("") + .help("Path for output JSON file to save memory allocation info. " + "Note: path end of file should have 'tracealloc.json' (example path: " + "'../exec_plan_info.tracealloc.json')"); try { @@ -63,6 +65,7 @@ int entry(int argc, char **argv) const std::string output_path = arser.get<std::string>("output"); const std::string platform_name = arser.get<std::string>("--platform"); const bool use_dsp = arser.get<bool>("--use_dsp"); + const std::string json_path = arser.get<std::string>("--save_allocations"); if (platform_name != "cmsisnn" && use_dsp) { @@ -89,6 +92,13 @@ int entry(int argc, char **argv) return EXIT_FAILURE; } + bool is_save_allocations = false; + + if (!json_path.empty()) + { + is_save_allocations = true; + } + foder::FileLoader file_loader{input_path}; std::vector<char> model_data; @@ -124,6 +134,9 @@ int entry(int argc, char **argv) circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp}); execution_planner.make_execution_plan(); + if (is_save_allocations) + execution_planner.create_json_allocation_file(json_path); + // Export to output Circle file luci::CircleExporter exporter; luci::CircleFileExpContract contract(module.get(), output_path); diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp index ec2ec1362..a1e6f7e1a 100644 --- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp @@ -18,6 +18,9 @@ #include <loco/IR/Algorithm.h> #include <luci/UserSettings.h> +#include <json.h> +#include <fstream> + namespace circle_planner { namespace @@ -58,6 +61,29 @@ bool isTensorProducingNode(const luci::CircleNode *node) } } +// Create allocation node part for current circle node for json allocation info file +void create_allocation_node(Json::Value &allocations_node, + AllocationNodeInformation &alloca_node_inform, uint32_t alive_till_max, + luci::CircleNode *circle_node) +{ + Json::Value allocation_node; + if (alloca_node_inform.size == 0) + return; + + allocation_node["offset"] = alloca_node_inform.offset; + allocation_node["size"] = alloca_node_inform.size; + allocation_node["alive_from"] = alloca_node_inform.first_node; + + if (alloca_node_inform.last_node == node_not_assigned) + allocation_node["alive_till"] = alive_till_max + 1; + else + allocation_node["alive_till"] = alloca_node_inform.last_node; + + allocation_node["origin"] = circle_node->name(); + + allocations_node.append(allocation_node); +} + } // namespace void ExecutionPlanner::make_execution_plan() @@ -74,6 +100,50 @@ void ExecutionPlanner::make_execution_plan() settings->set(luci::UserSettings::Key::ExecutionPlanGen, true); } +void ExecutionPlanner::create_json_allocation_file(const std::string &json_path) +{ + Json::Value main_tree; + Json::Value segments_node; + Json::Value allocations_node; + + uint32_t alive_till_max = 0; + + // Find max dealloc value to assign to nodes with node_not_assigned value + for (const auto elem : _dealloc_node) + { + if (alive_till_max < elem and elem != node_not_assigned) + alive_till_max = elem; + } + + for (auto &alloc_node_inform : _alloc_node_inform_vector) + { + const auto node_num = alloc_node_inform.node_num; + const auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[node_num]); + + create_allocation_node(allocations_node, alloc_node_inform, alive_till_max, circle_node); + } + + // Create segment part + Json::Value segment_node; + segment_node["name"] = "Segment1"; + segment_node["allocations"] = allocations_node; + segments_node.append(segment_node); + + main_tree["schema_version"] = 1; + main_tree["segments"] = segments_node; + + Json::StreamWriterBuilder builder; + const std::unique_ptr<Json::StreamWriter> writer(builder.newStreamWriter()); + + // Write to json file + std::ofstream out; + out.open(json_path); + if (out.is_open()) + { + writer->write(main_tree, &out); + } +} + void ExecutionPlanner::get_default_execution_order_plan() { // Get execution order in _ordered_nodes diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h index e0833c407..af3fba33e 100644 --- a/compiler/circle-execution-plan/src/ExecutionPlanner.h +++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h @@ -104,6 +104,8 @@ public: _is_null_scratchpads = is_null_scratchpads; }; + void create_json_allocation_file(const std::string &json_path); + private: // Method gets default execution order plan and saves it in _ordered_nodes vector. // There can be different variants of execution order and this method provides main one. diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp index 10e185de5..318a5826b 100644 --- a/compiler/circle-inspect/driver/Driver.cpp +++ b/compiler/circle-inspect/driver/Driver.cpp @@ -36,7 +36,7 @@ int entry(int argc, char **argv) .help("Dump Conv2D series weight operators in circle file"); arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file"); arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors"); - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect"); + arser.add_argument("circle").help("Circle file to inspect"); try { diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake index 362d67cf4..183dfe227 100644 --- a/compiler/circle-inspect/requires.cmake +++ b/compiler/circle-inspect/requires.cmake @@ -1,3 +1,4 @@ require("arser") +require("foder") require("mio-circle04") require("safemain") diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp index bba5e56c3..aa8fed248 100644 --- a/compiler/circle-inspect/src/Dump.cpp +++ b/compiler/circle-inspect/src/Dump.cpp @@ -15,7 +15,9 @@ */ #include "Dump.h" -#include "Reader.h" + +#include <mio_circle/Helper.h> +#include <mio_circle/Reader.h> #include <ostream> @@ -24,7 +26,7 @@ namespace circleinspect void DumpOperators::run(std::ostream &os, const circle::Model *model) { - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); const uint32_t subgraph_size = reader.num_subgraph(); @@ -50,7 +52,7 @@ void DumpOperators::run(std::ostream &os, const circle::Model *model) namespace { -const circle::Operator *operator_match_output(circleinspect::Reader &reader, const int32_t tensor) +const circle::Operator *operator_match_output(mio::circle::Reader &reader, const int32_t tensor) { auto ops = reader.operators(); @@ -58,7 +60,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con { const auto op = ops->Get(i); - const std::vector<int32_t> &outputs = circleinspect::as_index_vector(op->outputs()); + const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs()); for (auto output : outputs) { @@ -69,7 +71,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con return nullptr; } -size_t tensor_buffer_size(circleinspect::Reader &reader, const int32_t tensor_id) +size_t tensor_buffer_size(mio::circle::Reader &reader, const int32_t tensor_id) { auto tensors = reader.tensors(); @@ -93,7 +95,7 @@ namespace circleinspect void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model) { - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); const uint32_t subgraph_size = reader.num_subgraph(); @@ -110,7 +112,7 @@ void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model) if (bc == circle::BuiltinOperator_CONV_2D || bc == circle::BuiltinOperator_DEPTHWISE_CONV_2D) { - const std::vector<int32_t> &inputs = circleinspect::as_index_vector(op->inputs()); + const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs()); if (inputs.size() < 2) { throw std::runtime_error("Operator has invalid input"); @@ -147,7 +149,7 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model) { std::map<std::string, int32_t> op_version_map; - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); // This assert is subject to be changed later assert(reader.num_subgraph() == 1); @@ -181,7 +183,7 @@ namespace circleinspect void DumpTensorDType::run(std::ostream &os, const circle::Model *model) { - circleinspect::Reader reader(model); + mio::circle::Reader reader(model); const uint32_t subgraph_size = reader.num_subgraph(); diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h deleted file mode 100644 index c38ec3990..000000000 --- a/compiler/circle-inspect/src/Reader.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __READER_H__ -#define __READER_H__ - -#include <mio/circle/schema_generated.h> - -#include <map> -#include <string> -#include <vector> - -namespace circleinspect -{ - -template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array) -{ - std::vector<T> ret(flat_array->Length()); - for (uint32_t i = 0; i < flat_array->Length(); i++) - { - ret[i] = flat_array->Get(i); - } - return ret; -} - -/** - * @brief Loads Circle file and provides helpers to access attributes - */ -class Reader -{ -private: - using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>; - using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>; - using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>; - using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>; - -public: - Reader(const circle::Model *model); - - Reader() = delete; - -public: - const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; } - const CircleBuffers_t *buffers() { return _buffers; } - const CircleTensors_t *tensors() { return _tensors; } - const CircleOperators_t *operators() { return _operators; } - const std::vector<int32_t> &inputs() const { return _inputs; } - const std::vector<int32_t> &outputs() const { return _outputs; } - - uint32_t num_subgraph() const { return _subgraphs->Length(); } - - size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); - circle::BuiltinOperator builtin_code(const circle::Operator *op) const; - std::string opcode_name(const circle::Operator *op) const; - std::string tensor_name(const circle::Tensor *tensor) const; - std::string tensor_dtype(const circle::Tensor *tensor) const; - -public: - bool select_subgraph(uint32_t subgraph); - -private: - const CircleSubGraphs_t *_subgraphs{nullptr}; - const CircleBuffers_t *_buffers{nullptr}; - const CircleTensors_t *_tensors{nullptr}; - const CircleOperators_t *_operators{nullptr}; - - std::vector<const circle::OperatorCode *> _op_codes; - std::vector<int32_t> _inputs; - std::vector<int32_t> _outputs; -}; - -} // namespace circleinspect - -#endif // __READER_H__ diff --git a/compiler/circle-interpreter/CMakeLists.txt b/compiler/circle-interpreter/CMakeLists.txt new file mode 100644 index 000000000..d18db3e11 --- /dev/null +++ b/compiler/circle-interpreter/CMakeLists.txt @@ -0,0 +1,13 @@ +set(INTERPRETER + src/CircleInterpreter.cpp + ) + +add_executable(circle-interpreter ${INTERPRETER}) +target_link_libraries(circle-interpreter PRIVATE arser) +target_link_libraries(circle-interpreter PRIVATE loco) +target_link_libraries(circle-interpreter PRIVATE luci_import) +target_link_libraries(circle-interpreter PRIVATE luci_interpreter) +target_link_libraries(circle-interpreter PRIVATE safemain) +target_link_libraries(circle-interpreter PRIVATE vconone) + +install(TARGETS circle-interpreter DESTINATION bin) diff --git a/compiler/circle-interpreter/requires.cmake b/compiler/circle-interpreter/requires.cmake new file mode 100644 index 000000000..a565df65b --- /dev/null +++ b/compiler/circle-interpreter/requires.cmake @@ -0,0 +1,6 @@ +require("arser") +require("loco") +require("luci") +require("luci-interpreter") +require("safemain") +require("vconone") diff --git a/compiler/circle-interpreter/src/CircleInterpreter.cpp b/compiler/circle-interpreter/src/CircleInterpreter.cpp new file mode 100644 index 000000000..1d241278d --- /dev/null +++ b/compiler/circle-interpreter/src/CircleInterpreter.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <arser/arser.h> +#include <luci/ImporterEx.h> +#include <luci_interpreter/Interpreter.h> +#include <vconone/vconone.h> + +#include <cstdlib> +#include <fstream> +#include <vector> +#include <string> + +namespace +{ + +void readDataFromFile(const std::string &filename, char *data, size_t data_size) +{ + std::ifstream fs(filename, std::ifstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.read(data, data_size).fail()) + throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n"); +} + +void writeDataToFile(const std::string &filename, const char *data, size_t data_size) +{ + std::ofstream fs(filename, std::ofstream::binary); + if (fs.fail()) + throw std::runtime_error("Cannot open file \"" + filename + "\".\n"); + if (fs.write(data, data_size).fail()) + { + throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n"); + } +} + +template <typename NodeT> size_t getTensorSize(const NodeT *node) +{ + uint32_t tensor_size = loco::size(node->dtype()); + for (uint32_t i = 0; i < node->rank(); ++i) + tensor_size *= node->dim(i).value(); + return tensor_size; +} + +void print_version(void) +{ + std::cout << "circle-interpreter version " << vconone::get_string() << std::endl; + std::cout << vconone::get_copyright() << std::endl; +} + +} // namespace + +/* + * @brief CircleInterpreter main + * + * Driver to invoke luci-interpreter + * + */ +int entry(int argc, char **argv) +{ + arser::Arser arser("Interpreter driver for circle models"); + + arser::Helper::add_version(arser, print_version); + + arser.add_argument("model_path").help("Circle model filepath"); + arser.add_argument("input_prefix") + .help("Input data filepath for circle model. " + "n-th input data is read from ${input_prefix}n, " + "for example, Add.circle.input0, Add.circle.input1"); + arser.add_argument("output_prefix") + .help("Output data filepath for circle model. " + "Output data is written in ${output_file}n, " + "for example, Add.circle.output0"); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cout << err.what() << std::endl; + std::cout << arser; + return EXIT_FAILURE; + } + + const auto filename = arser.get<std::string>("model_path"); + const auto input_prefix = arser.get<std::string>("input_prefix"); + const auto output_prefix = arser.get<std::string>("output_prefix"); + + // Load model from the file + luci::ImporterEx importer; + std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename); + if (module == nullptr) + { + std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl; + return EXIT_FAILURE; + } + + // Create interpreter. + luci_interpreter::Interpreter interpreter(module.get()); + + // Set input. + // Data for n'th input is read from ${input_prefix}n + // (ex: Add.circle.input0, Add.circle.input1 ..) + const auto input_nodes = loco::input_nodes(module->graph()); + for (int32_t i = 0; i < input_nodes.size(); i++) + { + const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]); + std::vector<char> input_data(getTensorSize(input_node)); + readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(), + input_data.size()); + interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + } + + // Do inference. + interpreter.interpret(); + + // Get output. + const auto output_nodes = loco::output_nodes(module->graph()); + for (int i = 0; i < module->graph()->outputs()->size(); i++) + { + const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]); + std::vector<char> output_data(getTensorSize(output_node)); + interpreter.readOutputTensor(output_node, output_data.data(), output_data.size()); + + // Output data is written in ${output_file}n + // (ex: Add.circle.output0) + writeDataToFile(std::string(output_prefix) + std::to_string(i), output_data.data(), + output_data.size()); + } + return EXIT_SUCCESS; +} diff --git a/compiler/circle-operator-test/CMakeLists.txt b/compiler/circle-operator-test/CMakeLists.txt new file mode 100644 index 000000000..2ebd533b9 --- /dev/null +++ b/compiler/circle-operator-test/CMakeLists.txt @@ -0,0 +1,18 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR) +get_target_property(CIRCLE_OPERATOR_PATH circle-operator BINARY_DIR) +set(CIRCLE_OPERATOR_PATH "${CIRCLE_OPERATOR_PATH}/circle-operator") + +nnas_find_package(GTest REQUIRED) + +file(GLOB_RECURSE TESTS "src/*.test.cpp") + +GTest_AddTest(circle-operator-test ${TESTS}) + +set_tests_properties(circle-operator-test + PROPERTIES + ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_OPERATOR_PATH=${CIRCLE_OPERATOR_PATH}" + ) diff --git a/compiler/circle-operator-test/README.md b/compiler/circle-operator-test/README.md new file mode 100644 index 000000000..d07c64d2e --- /dev/null +++ b/compiler/circle-operator-test/README.md @@ -0,0 +1,7 @@ +# circle-operator-test + +_circle-operator-test_ provides test of circle-operator tool is working as expected. + +Current tests includes +- input arguments test is working as expected +- output of this tool is as expected diff --git a/compiler/circle-operator-test/requires.cmake b/compiler/circle-operator-test/requires.cmake new file mode 100644 index 000000000..8ad3b8a64 --- /dev/null +++ b/compiler/circle-operator-test/requires.cmake @@ -0,0 +1,2 @@ +require("circle-operator") +require("common-artifacts") diff --git a/compiler/circle-operator-test/src/circle-operator.test.cpp b/compiler/circle-operator-test/src/circle-operator.test.cpp new file mode 100644 index 000000000..29c6f3792 --- /dev/null +++ b/compiler/circle-operator-test/src/circle-operator.test.cpp @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include <cstdlib> +#include <fstream> +#include <vector> + +class cirlce_operator_test : public ::testing::Test +{ +protected: + bool initialize(void); + bool run(const std::string &command); + +protected: + bool load(const std::string &file); + +protected: + std::string _artifacts_path; + std::string _circle_operator_path; + std::string _result; +}; + +bool cirlce_operator_test::initialize(void) +{ + char *path = std::getenv("ARTIFACTS_PATH"); + if (path == nullptr) + { + std::cerr << "ARTIFACTS_PATH not found" << std::endl; + return false; + } + _artifacts_path = path; + + path = std::getenv("CIRCLE_OPERATOR_PATH"); + if (path == nullptr) + { + std::cerr << "ARTIFACTS_BIN_PATH not found" << std::endl; + return false; + } + _circle_operator_path = path; + + return true; +} + +bool cirlce_operator_test::run(const std::string &command) +{ + std::vector<char> buffer(260); + std::string result = ""; + std::string cmd_err = command + " 2>&1"; + FILE *pipe = popen(cmd_err.c_str(), "r"); + if (!pipe) + { + return false; + } + try + { + while (fgets(&buffer[0], buffer.size(), pipe) != NULL) + { + result += &buffer[0]; + } + } + catch (...) + { + pclose(pipe); + return false; + } + pclose(pipe); + _result = result; + + std::cout << _result << std::endl; + + return true; +} + +bool cirlce_operator_test::load(const std::string &file) +{ + std::ifstream tmp(file.c_str()); + if (tmp.fail()) + return false; + + std::stringstream buffer; + buffer << tmp.rdbuf(); + _result = buffer.str(); + return true; +} + +TEST_F(cirlce_operator_test, valid_names) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ofm"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, valid_codes) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --code " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ADD"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, invalid_option_NEG) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --opname " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("Invalid argument"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, check_code_name) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --code --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ofm"); + ASSERT_NE(std::string::npos, pos); + const auto pos2 = _result.find("ADD"); + ASSERT_NE(std::string::npos, pos2); +} + +TEST_F(cirlce_operator_test, nonexist_file_NEG) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/non_exist_file.foo"; + std::string command = _circle_operator_path + " --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ERROR"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, invalid_file_NEG) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string model = _artifacts_path + "/Add_000.recipe"; + std::string command = _circle_operator_path + " --name " + model; + if (!run(command)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ERROR"); + ASSERT_NE(std::string::npos, pos); +} + +TEST_F(cirlce_operator_test, output_file) +{ + if (!initialize()) + { + FAIL(); + return; + } + + std::string fileName("/tmp/a.txt"); + std::remove(fileName.c_str()); + std::string model = _artifacts_path + "/Add_000.circle"; + std::string command = _circle_operator_path + " --code --output_path " + fileName + " " + model; + if (!run(command)) + { + FAIL(); + return; + } + if (!load(fileName)) + { + FAIL(); + return; + } + + const auto pos = _result.find("ADD"); + ASSERT_NE(std::string::npos, pos); +} diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt new file mode 100644 index 000000000..6817a8618 --- /dev/null +++ b/compiler/circle-operator/CMakeLists.txt @@ -0,0 +1,17 @@ +if(NOT TARGET mio_circle04) + return() +endif(NOT TARGET mio_circle04) + +set(DRIVER "driver/Driver.cpp") + +file(GLOB_RECURSE SOURCES "src/*.cpp") + +add_executable(circle-operator ${DRIVER} ${SOURCES}) +target_include_directories(circle-operator PRIVATE src) +target_link_libraries(circle-operator arser) +target_link_libraries(circle-operator foder) +target_link_libraries(circle-operator mio_circle04) +target_link_libraries(circle-operator mio_circle04_helper) +target_link_libraries(circle-operator safemain) + +install(TARGETS circle-operator DESTINATION bin) diff --git a/compiler/circle-operator/README.md b/compiler/circle-operator/README.md new file mode 100644 index 000000000..86a923f05 --- /dev/null +++ b/compiler/circle-operator/README.md @@ -0,0 +1,70 @@ +# circle-operator + +_circle-operator_ allows users to retrieve operators information from a Circle model file + +NOTE: this tool is primary for ONE-vscode where PartEditor needs names and codes +of the operators. + +## Information with operators + +Operators with `--name` +- show operator names one line at a time in execution order + +Example +``` +$ circle-operator --name model.circle +``` + +Result +``` +conv1_pad/Pad +conv1_conv/BiasAdd +pool1_pad/Pad +``` + +Operators codes with `--code` +- show operator codes one line at a time in execution order + +Example +``` +$ circle-operator --code model.circle +``` + +Result +``` +PAD +CONV_2D +PAD +``` + +Operators with both `--code` and `--name` +- show operator both codes and name separated with `,` one line at a time in execution order + +Example +``` +$ circle-operator --code --name model.circle +``` + +Result +``` +PAD,conv1_pad/Pad +CONV_2D,conv1_conv/BiasAdd +PAD,pool1_pad/Pad +``` + +## Save to file + +Use `--output_path` to save results to a file. + +Example +``` +$ circle-operator --name --output_path /tmp/result model.circle +``` + +Result +``` +$ cat /tmp/result +conv1_pad/Pad +conv1_conv/BiasAdd +pool1_pad/Pad +``` diff --git a/compiler/circle-operator/driver/Driver.cpp b/compiler/circle-operator/driver/Driver.cpp new file mode 100644 index 000000000..f5fd8073c --- /dev/null +++ b/compiler/circle-operator/driver/Driver.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dump.h" + +#include <arser/arser.h> +#include <foder/FileLoader.h> +#include <fstream> + +#include <functional> +#include <iostream> +#include <map> +#include <memory> +#include <vector> +#include <string> + +#include <signal.h> + +void handle_segfault(int signal, siginfo_t *si, void *arg) +{ + std::cerr << "ERROR: Failed to load file" << std::endl; + exit(255); +} + +int entry(int argc, char **argv) +{ + // TODO add option to dump for all sub-graphs + arser::Arser arser{ + "circle-operator allows users to retrieve operator information from a Circle model file"}; + arser.add_argument("--name").nargs(0).help("Dump operators name in circle file"); + arser.add_argument("--code").nargs(0).help("Dump operators code in circle file"); + arser.add_argument("--output_path").help("Save output to file (default output is console)"); + arser.add_argument("circle").help("Circle file to dump"); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + std::cerr << arser; + return 255; + } + + cirops::DumpOption option; + option.names = arser["--name"]; + option.codes = arser["--code"]; + + std::ofstream oFstream; + std::ostream *oStream = &std::cout; + if (arser["--output_path"]) + { + auto output_path = arser.get<std::string>("--output_path"); + oFstream.open(output_path, std::ofstream::out | std::ofstream::trunc); + if (oFstream.fail()) + { + std::cerr << "ERROR: Failed to create output to file " << output_path << std::endl; + return 255; + } + oStream = &oFstream; + } + + // hook segment fault + struct sigaction sa; + memset(&sa, 0, sizeof(struct sigaction)); + sigemptyset(&sa.sa_mask); + sa.sa_sigaction = handle_segfault; + sa.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &sa, NULL); + + std::string modelFile = arser.get<std::string>("circle"); + // Load Circle model from a circle file + try + { + foder::FileLoader fileLoader{modelFile}; + std::vector<char> modelData = fileLoader.load(); + const circle::Model *circleModel = circle::GetModel(modelData.data()); + if (circleModel == nullptr) + { + std::cerr << "ERROR: Failed to load circle '" << modelFile << "'" << std::endl; + return 255; + } + cirops::DumpOperators dump; + dump.run(*oStream, circleModel, option); + } + catch (const std::runtime_error &err) + { + std::cerr << "ERROR: " << err.what() << std::endl; + return 255; + } + + if (oFstream.is_open()) + { + oFstream.close(); + } + + return 0; +} diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake new file mode 100644 index 000000000..183dfe227 --- /dev/null +++ b/compiler/circle-operator/requires.cmake @@ -0,0 +1,4 @@ +require("arser") +require("foder") +require("mio-circle04") +require("safemain") diff --git a/compiler/circle-operator/src/Dump.cpp b/compiler/circle-operator/src/Dump.cpp new file mode 100644 index 000000000..36bfe8632 --- /dev/null +++ b/compiler/circle-operator/src/Dump.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Dump.h" + +#include <mio_circle/Helper.h> +#include <mio_circle/Reader.h> + +#include <ostream> + +namespace +{ + +void dump_ops(std::ostream &os, mio::circle::Reader &reader, const cirops::DumpOption &option) +{ + auto ops = reader.operators(); + for (uint32_t i = 0; i < ops->Length(); ++i) + { + const auto op = ops->Get(i); + const auto op_name = reader.opcode_name(op); + + if (option.all_graphs) + { + // NOTE all_graphs is false for now + // TODO check using '$' as split key + os << i << "$"; + } + + if (option.codes) + { + const auto op_name = reader.opcode_name(op); + os << op_name; + } + if (option.names) + { + // TODO multiple outputs? + const auto tensors = reader.tensors(); + const auto output_tensors = reader.outputs(op); + const auto output = output_tensors.at(0); + const auto tensor = tensors->Get(output); + const std::string name = mio::circle::tensor_name(tensor); + if (option.codes) + { + os << ","; + } + os << name; + } + os << std::endl; + } +} + +} // namespace + +namespace cirops +{ + +void DumpOperators::run(std::ostream &os, const circle::Model *model, const DumpOption &option) +{ + mio::circle::Reader reader(model); + + const uint32_t subgraph_size = reader.num_subgraph(); + for (uint32_t g = 0; g < subgraph_size; g++) + { + reader.select_subgraph(g); + dump_ops(os, reader, option); + + if (!option.all_graphs) + break; + } +} + +} // namespace cirops diff --git a/compiler/circle-operator/src/Dump.h b/compiler/circle-operator/src/Dump.h new file mode 100644 index 000000000..aa1d1be49 --- /dev/null +++ b/compiler/circle-operator/src/Dump.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DUMP_H__ +#define __DUMP_H__ + +#include <mio/circle/schema_generated.h> + +#include <ostream> + +namespace cirops +{ + +struct DumpOption +{ + bool names = false; + bool codes = false; + bool all_graphs = false; +}; + +class DumpOperators +{ +public: + DumpOperators() = default; + +public: + void run(std::ostream &os, const circle::Model *model, const DumpOption &option); +}; + +} // namespace cirops + +#endif // __DUMP_H__ diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp index a1ace4f58..4b39a6ddb 100644 --- a/compiler/circle-opselector/driver/Driver.cpp +++ b/compiler/circle-opselector/driver/Driver.cpp @@ -159,26 +159,16 @@ int entry(int argc, char **argv) arser::Arser arser("circle-opselector provides selecting operations in circle model"); - arser.add_argument("--version") - .nargs(0) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); + arser::Helper::add_version(arser, print_version); // TODO Add new options! - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); // select option - arser.add_argument("--by_id") - .nargs(1) - .type(arser::DataType::STR) - .help("Input operation id to select nodes."); - arser.add_argument("--by_name") - .nargs(1) - .type(arser::DataType::STR) - .help("Input operation name to select nodes."); + arser.add_argument("--by_id").help("Input operation id to select nodes."); + arser.add_argument("--by_name").help("Input operation name to select nodes."); try { diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt index 0657607d2..ffe1b8909 100644 --- a/compiler/circle-part-value-test/CMakeLists.txt +++ b/compiler/circle-part-value-test/CMakeLists.txt @@ -82,7 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1}) # Run partitioner add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON} - COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}" + COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file" + "${PARTITION_NAME}.circle" "--work_path" "${PARTITIONER_OUTPUT_PATH}" DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH} COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}" ) diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt index e29a66b41..7b26b3ba7 100644 --- a/compiler/circle-partitioner-test/CMakeLists.txt +++ b/compiler/circle-partitioner-test/CMakeLists.txt @@ -57,7 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1}) # Run partitioner set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json") add_custom_command(OUTPUT ${PART_CONN_JSON} - COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}" + COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file" + "${PART_NAME}.circle" "--work_path" "${PART_OUT_PATH}" DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH} COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}" ) diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt index 9b8f5afae..abc5d93fb 100644 --- a/compiler/circle-partitioner/CMakeLists.txt +++ b/compiler/circle-partitioner/CMakeLists.txt @@ -1,7 +1,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circle-partitioner "${SOURCES}") -target_link_libraries(circle-partitioner foder) target_link_libraries(circle-partitioner crew) target_link_libraries(circle-partitioner safemain) target_link_libraries(circle-partitioner luci_lang) @@ -17,22 +16,3 @@ target_link_libraries(circle-partitioner vconone) target_link_libraries(circle-partitioner nncc_common) install(TARGETS circle-partitioner DESTINATION bin) - -# TODO remove circle_partitioner -add_executable(circle_partitioner "${SOURCES}") -target_link_libraries(circle_partitioner foder) -target_link_libraries(circle_partitioner crew) -target_link_libraries(circle_partitioner safemain) -target_link_libraries(circle_partitioner luci_lang) -target_link_libraries(circle_partitioner luci_log) -target_link_libraries(circle_partitioner luci_import) -target_link_libraries(circle_partitioner luci_service) -target_link_libraries(circle_partitioner luci_pass) -target_link_libraries(circle_partitioner luci_export) -target_link_libraries(circle_partitioner luci_partition) -target_link_libraries(circle_partitioner arser) -target_link_libraries(circle_partitioner pepper_csv2vec) -target_link_libraries(circle_partitioner vconone) -target_link_libraries(circle_partitioner nncc_common) - -install(TARGETS circle_partitioner DESTINATION bin) diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md index 2e0a98638..760cf28d1 100644 --- a/compiler/circle-partitioner/README.md +++ b/compiler/circle-partitioner/README.md @@ -4,10 +4,10 @@ _circle-partitioner_ provides model partitioning of circle model to two or more ## How circle-partitioner work -_circle-partitioner_ requires 3 positional arguments -- first: `partition` file -- second: `input` circle model file -- third: `work` folder +_circle-partitioner_ requires 3 arguments for inputs files +- `--part_file`: `partition` file, use extension `.part` +- `--input_file`: `input` circle model file +- `--work_path`: `work` path where input files reside. this is optional and CWD if omitted And options to override `partition` file as a helper to try out without editing `partition` file. - `--backends`: override `backends` of `[partition]` section @@ -20,7 +20,7 @@ are read from `work` folder. Outputs are (1) one or more partitioned circle models and (2) connection file that gives how the partitioned models should be connected to act like the source `input` model. -Why does input files be placed in `work` folder too? +Why does input files be placed in `work` path too? - this is still work in progress condition - use cases are still ambigious - original `input` model file can be used by the backend, so `.conn` file links it as `source` @@ -94,7 +94,8 @@ Net_InstanceNorm_003/ Command example ``` -./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003 +./circle-partitioner --part_file Net_InstanceNorm_003.part \ +--input_file Net_InstanceNorm_003.circle --work_path= Net_InstanceNorm_003 ``` Result of _circle-partitioner_ @@ -171,11 +172,11 @@ Consider partitioning with backends of OneRT Let's try with this command: ``` -circle_partitioner \ - --partition Net_InstanceNorm_003.part \ - --backends cpu,acl_cl \ - --default cpu \ - Net_InstanceNorm_003.circle Net_InstanceNorm_003 +circle-partitioner \ + --backends cpu,acl_cl --default cpu \ + --part_file Net_InstanceNorm_003.part \ + --input_file Net_InstanceNorm_003.circle \ + --work_path Net_InstanceNorm_003 ``` where `Net_InstanceNorm_003.part` is like this for initial design diff --git a/compiler/circle-partitioner/requires.cmake b/compiler/circle-partitioner/requires.cmake index 690d9531c..82d9c2b0f 100644 --- a/compiler/circle-partitioner/requires.cmake +++ b/compiler/circle-partitioner/requires.cmake @@ -1,4 +1,3 @@ -require("foder") require("crew") require("pepper-csv2vec") require("safemain") diff --git a/compiler/circle-partitioner/src/CirclePartitioner.cpp b/compiler/circle-partitioner/src/CirclePartitioner.cpp index 0151e92d3..5cecb9ae0 100644 --- a/compiler/circle-partitioner/src/CirclePartitioner.cpp +++ b/compiler/circle-partitioner/src/CirclePartitioner.cpp @@ -18,9 +18,7 @@ #include "PartitionExport.h" #include "HelperPath.h" -#include <foder/FileLoader.h> - -#include <luci/Importer.h> +#include <luci/ImporterEx.h> #include <luci/Service/Validate.h> #include <luci/CircleExporter.h> #include <luci/CircleFileExpContract.h> @@ -41,9 +39,9 @@ namespace const char *opt_bks = "--backends"; const char *opt_def = "--default"; -const char *opt_part = "partition"; -const char *opt_input = "input"; -const char *opt_work = "work"; +const char *opt_part_file = "--part_file"; +const char *opt_input_file = "--input_file"; +const char *opt_work_path = "--work_path"; void print_version(void) { @@ -53,63 +51,25 @@ void print_version(void) void build_arser(arser::Arser &arser) { - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument(opt_bks) - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Backends in CSV to use for partitioning"); - - arser.add_argument(opt_def) - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Default backend to assign"); - - arser.add_argument(opt_part) - .nargs(1) - .type(arser::DataType::STR) + arser::Helper::add_version(arser, print_version); + + arser.add_argument(opt_bks).help("Backends in CSV to use for partitioning"); + + arser.add_argument(opt_def).help("Default backend to assign"); + + arser.add_argument(opt_part_file) + .required(true) .help("Partition file which provides backend to assign"); - arser.add_argument(opt_input) - .nargs(1) - .type(arser::DataType::STR) - .help("Input circle model filename"); - arser.add_argument(opt_work) - .nargs(1) - .type(arser::DataType::STR) + arser.add_argument(opt_input_file).required(true).help("Input circle model filename"); + arser.add_argument(opt_work_path) .help("Work folder of partition, input files exist and output files are produced"); } std::unique_ptr<luci::Module> load_model(const std::string &input_path) { - // Load model from the file - foder::FileLoader file_loader{input_path}; - std::vector<char> model_data = file_loader.load(); - - // Verify flatbuffers - flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; - if (!circle::VerifyModelBuffer(verifier)) - { - std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; - return nullptr; - } - - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; - return nullptr; - } - // Import from input Circle file - luci::Importer importer; - return importer.importModule(circle_model); + luci::ImporterEx importerex; + return importerex.importVerifyModule(input_path); } } // namespace @@ -133,9 +93,14 @@ int entry(int argc, char **argv) return EXIT_FAILURE; } - std::string partition_file = arser.get<std::string>(opt_part); - std::string input_file = arser.get<std::string>(opt_input); - std::string work_folder = arser.get<std::string>(opt_work); + std::string partition_file = arser.get<std::string>(opt_part_file); + std::string input_file = arser.get<std::string>(opt_input_file); + std::string work_folder = "."; + + if (arser[opt_work_path]) + { + work_folder = arser.get<std::string>(opt_work_path); + } std::string partition_path = work_folder + "/" + partition_file; std::string input_path = work_folder + "/" + input_file; diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt index 5ec8b6ee5..a3a2902d9 100644 --- a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt +++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt @@ -18,7 +18,7 @@ unset(TEST_NAMES) get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR) set(options USE_QCONFIG) -set(oneValueArgs DTYPE GRANULARITY) +set(oneValueArgs DTYPE GRANULARITY INPUT_DTYPE OUTPUT_DTYPE) set(multiValueArgs "") macro(Add RECIPE) @@ -29,6 +29,16 @@ macro(Add RECIPE) set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json") endif() + set(INPUT_DTYPE_OPT "") + if(ARG_INPUT_DTYPE) + set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}") + endif() + + set(OUTPUT_DTYPE_OPT "") + if(ARG_OUTPUT_DTYPE) + set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}") + endif() + set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle") set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle") set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle") @@ -38,7 +48,10 @@ macro(Add RECIPE) add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH} COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH} COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH} - COMMAND $<TARGET_FILE:circle-quantizer> --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH} + COMMAND $<TARGET_FILE:circle-quantizer> + --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} + ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH} + ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT} DEPENDS circle-quantizer record-minmax diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst index 188103016..58f89c767 100644 --- a/compiler/circle-quantizer-dredd-recipe-test/test.lst +++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst @@ -6,10 +6,75 @@ ## TFLITE RECIPE +# MPQ Test (default: u8, target: s16) +Add(Quant_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_AveragePool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_BatchMatMul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Concatenation_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_003 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_DepthwiseConv2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_FullyConnected_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_LeakyRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Logistic_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_MaxPool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mean_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Neg_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Pad_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_PRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU6_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Reshape_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeBilinear_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeNearestNeighbor_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Slice_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Softmax_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Tanh_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Transpose_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_TransposeConv_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) + +# MPQ Test (default: s16, target: u8) +Add(Quant_Add_002 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_AveragePool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_BatchMatMul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Concatenation_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_004 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_DepthwiseConv2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_FullyConnected_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_LeakyRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Logistic_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_MaxPool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mean_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Mul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Neg_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Pad_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_PRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ReLU6_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Reshape_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeBilinear_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_ResizeNearestNeighbor_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Slice_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Softmax_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Tanh_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_Transpose_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) +Add(Quant_TransposeConv_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) + Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG) +Add(Quant_Conv_000 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32) +Add(Quant_Conv_001 DTYPE uint8 GRANULARITY channel OUTPUT_DTYPE float32) +Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32) AddFakeQuant(Quant_Add_000) + +## CIRCLE RECIPE + +# MPQ Test (default: u8, target: s16) +Add(Quant_InstanceNorm_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG) + +# MPQ Test (default: s16, target: u8) +Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG) diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt index 14e00972b..16e41a327 100644 --- a/compiler/circle-quantizer/CMakeLists.txt +++ b/compiler/circle-quantizer/CMakeLists.txt @@ -10,7 +10,6 @@ add_executable(circle-quantizer "${SOURCES}") target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS}) target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB}) -target_link_libraries(circle-quantizer foder) target_link_libraries(circle-quantizer safemain) target_link_libraries(circle-quantizer oops) target_link_libraries(circle-quantizer loco) diff --git a/compiler/circle-quantizer/requires.cmake b/compiler/circle-quantizer/requires.cmake index c21e28e8d..4fcee1873 100644 --- a/compiler/circle-quantizer/requires.cmake +++ b/compiler/circle-quantizer/requires.cmake @@ -1,4 +1,3 @@ -require("foder") require("loco") require("locop") require("safemain") diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp index e0c85cb6e..f1e31ed8d 100644 --- a/compiler/circle-quantizer/src/CircleQuantizer.cpp +++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp @@ -14,9 +14,7 @@ * limitations under the License. */ -#include <foder/FileLoader.h> - -#include <luci/Importer.h> +#include <luci/ImporterEx.h> #include <luci/CircleQuantizer.h> #include <luci/Service/Validate.h> #include <luci/CircleExporter.h> @@ -59,13 +57,31 @@ std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename std::vector<std::shared_ptr<LayerParam>> p; for (auto layer : layers) { - auto l = std::make_shared<LayerParam>(); + if (layer.isMember("name")) { - l->name = layer["name"].asString(); - l->dtype = layer["dtype"].asString(); - l->granularity = layer["granularity"].asString(); + auto l = std::make_shared<LayerParam>(); + { + l->name = layer["name"].asString(); + l->dtype = layer["dtype"].asString(); + l->granularity = layer["granularity"].asString(); + } + p.emplace_back(l); + } + + // Multiple names with the same dtype & granularity + if (layer.isMember("names")) + { + for (auto name : layer["names"]) + { + auto l = std::make_shared<LayerParam>(); + { + l->name = name.asString(); + l->dtype = layer["dtype"].asString(); + l->granularity = layer["granularity"].asString(); + } + p.emplace_back(l); + } } - p.emplace_back(l); } return p; @@ -109,23 +125,12 @@ int entry(int argc, char **argv) arser::Arser arser("circle-quantizer provides circle model quantization"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); arser.add_argument(qdqw) .nargs(3) .type(arser::DataType::STR_VEC) - .required(false) .help("Quantize-dequantize weight values required action before quantization. " "Three arguments required: input_model_dtype(float32) " "output_model_dtype(uint8) granularity(layer, channel)"); @@ -133,28 +138,24 @@ int entry(int argc, char **argv) arser.add_argument(qwmm) .nargs(3) .type(arser::DataType::STR_VEC) - .required(false) .help("Quantize with min/max values. " "Three arguments required: input_model_dtype(float32) " "output_model_dtype(uint8) granularity(layer, channel)"); arser.add_argument(tf_maxpool) .nargs(0) - .required(false) .default_value(false) .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can " "degrade accuracy of some models"); arser.add_argument(fake_quant) .nargs(0) - .required(false) .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will " "generate an fp32 model."); arser.add_argument(rq) .nargs(2) .type(arser::DataType::STR_VEC) - .required(false) .help("Requantize a quantized model. " "Two arguments required: input_model_dtype(int8) " "output_model_dtype(uint8)"); @@ -162,7 +163,6 @@ int entry(int argc, char **argv) arser.add_argument(fq) .nargs(3) .type(arser::DataType::STR_VEC) - .required(false) .accumulated(true) .help("Write quantization parameters to the specified tensor. " "Three arguments required: tensor_name(string), " @@ -171,32 +171,21 @@ int entry(int argc, char **argv) arser.add_argument(cq) .nargs(2) .type(arser::DataType::STR_VEC) - .required(false) .accumulated(true) .help("Copy quantization parameter from a tensor to another tensor." "Two arguments required: source_tensor_name(string), " "destination_tensor_name(string)"); arser.add_argument("--input_type") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Input type of quantized model (uint8 or int16)"); + .help("Input type of quantized model (uint8, int16, or float32)"); arser.add_argument("--output_type") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Output type of quantized model (uint8 or int16)"); + .help("Output type of quantized model (uint8, int16, or float32)"); - arser.add_argument(cfg) - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Path to the quantization configuration file"); + arser.add_argument(cfg).help("Path to the quantization configuration file"); - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); arser.add_argument(gpd).nargs(0).required(false).default_value(false).help( "This will turn on profiling data generation."); @@ -384,27 +373,10 @@ int entry(int argc, char **argv) settings->set(luci::UserSettings::Key::ProfilingDataGen, true); // Load model from the file - foder::FileLoader file_loader{input_path}; - std::vector<char> model_data = file_loader.load(); - - // Verify flatbuffers - flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; - if (!circle::VerifyModelBuffer(verifier)) - { - std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; - return EXIT_FAILURE; - } - - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; + luci::ImporterEx importerex; + auto module = importerex.importVerifyModule(input_path); + if (module.get() == nullptr) return EXIT_FAILURE; - } - - // Import from input Circle file - luci::Importer importer; - auto module = importer.importModule(circle_model); for (size_t idx = 0; idx < module->size(); ++idx) { diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp index 70f3c8d84..c32dc3f5a 100644 --- a/compiler/circle-tensordump/driver/Driver.cpp +++ b/compiler/circle-tensordump/driver/Driver.cpp @@ -31,11 +31,9 @@ int entry(int argc, char **argv) arser::Arser arser{ "circle-tensordump allows users to retrieve tensor information from a Circle model file"}; - arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump"); + arser.add_argument("circle").help("Circle file path to dump"); arser.add_argument("--tensors").nargs(0).help("Dump to console"); arser.add_argument("--tensors_to_hdf5") - .nargs(1) - .type(arser::DataType::STR) .help("Dump to hdf5 file. Specify hdf5 file path to be dumped"); try diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp index e477a7417..49afa73df 100644 --- a/compiler/circle-tensordump/src/Dump.cpp +++ b/compiler/circle-tensordump/src/Dump.cpp @@ -15,7 +15,8 @@ */ #include "Dump.h" -#include "Reader.h" + +#include <mio_circle/Reader.h> #include <H5Cpp.h> @@ -102,7 +103,7 @@ namespace circletensordump void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &) { - circletensordump::Reader reader(model); + mio::circle::Reader reader(model); uint32_t num_subgraph = reader.num_subgraph(); auto buffers = reader.buffers(); @@ -296,7 +297,7 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model, const std::string &output_path) { // loads a circle model - circletensordump::Reader reader(model); + mio::circle::Reader reader(model); uint32_t num_subgraph = reader.num_subgraph(); // create a hdf5 file diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp deleted file mode 100644 index 47b876054..000000000 --- a/compiler/circle-tensordump/src/Reader.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Reader.h" - -#include <mio_circle/Helper.h> - -#include <sstream> -#include <string> - -namespace circletensordump -{ - -Reader::Reader(const circle::Model *model) -{ - _subgraphs = model->subgraphs(); - _buffers = model->buffers(); - - auto opcodes = model->operator_codes(); - for (const ::circle::OperatorCode *opcode : *opcodes) - { - _op_codes.push_back(opcode); - } -} - -size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) -{ - if (buff_data != nullptr) - { - *buff_data = nullptr; - } - - if (buf_idx == 0) - return 0; - - if (auto *buffer = (*_buffers)[buf_idx]) - { - if (auto *array = buffer->data()) - { - if (size_t size = array->size()) - { - if (buff_data != nullptr) - { - *buff_data = reinterpret_cast<const uint8_t *>(array->data()); - } - return size; - } - } - } - - return 0; -} - -circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - return mio::circle::builtin_code_neutral(opcode); -} - -std::string Reader::opcode_name(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - if (!mio::circle::is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid: " << index << ")"; - return oss.str(); - } - - return mio::circle::opcode_name(opcode); -} - -bool Reader::select_subgraph(uint32_t sgindex) -{ - _tensors = nullptr; - _operators = nullptr; - - _inputs.clear(); - _outputs.clear(); - - if (_subgraphs->Length() <= sgindex) - { - assert(false); - return false; - } - - const circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; - - _tensors = subgraph->tensors(); - _operators = subgraph->operators(); - - _inputs = as_index_vector(subgraph->inputs()); - _outputs = as_index_vector(subgraph->outputs()); - - return true; -} - -} // namespace circletensordump diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h deleted file mode 100644 index c868bc277..000000000 --- a/compiler/circle-tensordump/src/Reader.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CIRCLE_TENSORDUMP_READER_H__ -#define __CIRCLE_TENSORDUMP_READER_H__ - -#include <mio/circle/schema_generated.h> - -#include <map> -#include <string> -#include <vector> - -namespace circletensordump -{ - -template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array) -{ - std::vector<T> ret(flat_array->Length()); - for (uint32_t i = 0; i < flat_array->Length(); i++) - { - ret[i] = flat_array->Get(i); - } - return ret; -} - -/** - * @brief Loads Circle file and provides helpers to access attributes - */ -class Reader -{ -private: - using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>; - using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>; - using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>; - using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>; - -public: - Reader(const circle::Model *model); - - Reader() = delete; - -public: - const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; } - const CircleBuffers_t *buffers() { return _buffers; } - const CircleTensors_t *tensors() { return _tensors; } - const CircleOperators_t *operators() { return _operators; } - const std::vector<int32_t> &inputs() const { return _inputs; } - const std::vector<int32_t> &outputs() const { return _outputs; } - - uint32_t num_subgraph() const { return _subgraphs->Length(); } - - size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); - circle::BuiltinOperator builtin_code(const circle::Operator *op) const; - std::string opcode_name(const circle::Operator *op) const; - -public: - bool select_subgraph(uint32_t subgraph); - -private: - const CircleSubGraphs_t *_subgraphs{nullptr}; - const CircleBuffers_t *_buffers{nullptr}; - const CircleTensors_t *_tensors{nullptr}; - const CircleOperators_t *_operators{nullptr}; - - std::vector<const circle::OperatorCode *> _op_codes; - std::vector<int32_t> _inputs; - std::vector<int32_t> _outputs; -}; - -} // namespace circletensordump - -#endif // __CIRCLE_TENSORDUMP_READER_H__ diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp index 7a44c65b9..c3a414701 100644 --- a/compiler/circle-verify/src/Driver.cpp +++ b/compiler/circle-verify/src/Driver.cpp @@ -25,7 +25,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to verify"); + arser.add_argument("circle").help("Circle file path to verify"); try { diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst index f41aac303..a6f2786d2 100644 --- a/compiler/circle2circle-dredd-recipe-test/test.lst +++ b/compiler/circle2circle-dredd-recipe-test/test.lst @@ -31,6 +31,8 @@ Add(Net_TConv_Add_002 PASS fuse_add_with_tconv) Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv) Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv) Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv) +Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv) +Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv) Add(Net_InstanceNorm_001 PASS fuse_instnorm) Add(Net_InstanceNorm_003 PASS fuse_instnorm) Add(Net_InstanceNorm_004 PASS fuse_instnorm) @@ -46,6 +48,7 @@ Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape) Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax) +Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul) ## CIRCLE RECIPE diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt index cd79967b7..dbe485b9f 100644 --- a/compiler/circle2circle/CMakeLists.txt +++ b/compiler/circle2circle/CMakeLists.txt @@ -4,7 +4,6 @@ list(REMOVE_ITEM SOURCES ${TESTS}) add_executable(circle2circle "${SOURCES}") target_include_directories(circle2circle PRIVATE src) -target_link_libraries(circle2circle foder) target_link_libraries(circle2circle nncc_common) target_link_libraries(circle2circle safemain) target_link_libraries(circle2circle oops) @@ -29,7 +28,6 @@ nnas_find_package(GTest REQUIRED) GTest_AddTest(circle2circle_test ${TESTS} ${SOURCES}) target_include_directories(circle2circle_test PRIVATE src) -target_link_libraries(circle2circle_test foder) target_link_libraries(circle2circle_test nncc_common) target_link_libraries(circle2circle_test oops) target_link_libraries(circle2circle_test hermes) diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake index b6c61198f..4e5ed0dd1 100644 --- a/compiler/circle2circle/requires.cmake +++ b/compiler/circle2circle/requires.cmake @@ -1,4 +1,3 @@ -require("foder") require("loco") require("locop") require("logo-core") diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index ae677a321..f5cf0d782 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -14,9 +14,7 @@ * limitations under the License. */ -#include <foder/FileLoader.h> - -#include <luci/Importer.h> +#include <luci/ImporterEx.h> #include <luci/CircleOptimizer.h> #include <luci/Service/ChangeOutputs.h> #include <luci/Service/Validate.h> @@ -54,6 +52,11 @@ void csv_tokenize(const std::string &data, std::vector<std::string> &result) result.push_back(token); } +void add_switch(arser::Arser &arser, const char *opt, const char *desc) +{ + arser.add_argument(opt).nargs(0).default_value(false).help(desc); +} + int entry(int argc, char **argv) { // Simple argument parser (based on map) @@ -64,368 +67,125 @@ int entry(int argc, char **argv) arser::Arser arser("circle2circle provides circle model optimization and transformations"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); - - arser.add_argument("--O1").nargs(0).required(false).default_value(false).help( - "Enable O1 optimize options"); - - arser.add_argument("--fold_add_v2") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold AddV2 operators with constant inputs"); - - arser.add_argument("--fold_cast") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold Cast operators with constant input"); - - arser.add_argument("--fold_dequantize") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold dequantize op"); - - arser.add_argument("--fold_dwconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold Depthwise Convolution operator with constant inputs"); - - arser.add_argument("--fold_gather") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold Gather operator"); - - arser.add_argument("--fold_sparse_to_dense") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fold SparseToDense operator"); - - arser.add_argument("--forward_reshape_to_unaryop") - .nargs(0) - .required(false) - .default_value(false) - .help("This will move Reshape after UnaryOp for centain condition"); - - arser.add_argument("--fuse_activation_function") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Activation function to a preceding operator"); - - arser.add_argument("--fuse_add_with_fully_connected") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Add operator to FullyConnected operator"); - - arser.add_argument("--fuse_add_with_tconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Add operator to Transposed Convolution operator"); - - arser.add_argument("--fuse_batchnorm_with_conv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators to Convolution operator"); - - arser.add_argument("--fuse_batchnorm_with_dwconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators to Depthwise Convolution operator"); - - arser.add_argument("--fuse_batchnorm_with_tconv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators to Transposed Convolution operator"); - - arser.add_argument("--fuse_bcq") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse operators and apply Binary Coded Quantization"); - - arser.add_argument("--fuse_instnorm") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse operators to InstanceNorm operator"); - - arser.add_argument("--fuse_mean_with_mean") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse two Mean operations when they follow one by one." - "This will fold them into one operation and merge reduction indices."); - - arser.add_argument("--fuse_transpose_with_mean") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse Mean operation with a preceding Transpose under certain conditions."); - - arser.add_argument("--make_batchnorm_gamma_positive") - .nargs(0) - .required(false) - .default_value(false) - .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note " - "that this pass can change the execution result of the model. So, use it only when the " - "impact is known to be acceptable."); - - arser.add_argument("--fuse_preactivation_batchnorm") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse BatchNorm operators of pre-activations to Convolution operator"); - - arser.add_argument("--remove_fakequant") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove FakeQuant operators"); - - arser.add_argument("--remove_quantdequant") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove Quantize-Dequantize sequence"); - - arser.add_argument("--remove_redundant_quantize") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove redundant Quantize operators"); - - arser.add_argument("--remove_redundant_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse or remove subsequent Reshape operators"); - - arser.add_argument("--remove_redundant_transpose") - .nargs(0) - .required(false) - .default_value(false) - .help("This will fuse or remove subsequent Transpose operators"); - - arser.add_argument("--remove_unnecessary_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary reshape operators"); - - arser.add_argument("--remove_unnecessary_slice") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary slice operators"); - - arser.add_argument("--remove_unnecessary_strided_slice") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary strided slice operators"); - - arser.add_argument("--remove_unnecessary_split") - .nargs(0) - .required(false) - .default_value(false) - .help("This will remove unnecessary split operators"); - - arser.add_argument("--replace_cw_mul_add_with_depthwise_conv") - .nargs(0) - .required(false) - .default_value(false) - .help("This will replace channel-wise mul/add with DepthwiseConv2D operator"); - - arser.add_argument("--replace_sub_with_add") - .nargs(0) - .required(false) - .default_value(false) - .help("This will replace sub with add operator"); - - arser.add_argument("--resolve_customop_add") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(Add) to Add operator"); - - arser.add_argument("--resolve_customop_batchmatmul") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(BatchMatmul) to BatchMatmul operator"); - - arser.add_argument("--resolve_customop_matmul") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(Matmul) to Matmul operator"); - - arser.add_argument("--resolve_customop_max_pool_with_argmax") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators"); - - arser.add_argument("--shuffle_weight_to_16x1float32") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that " - "it only converts weights whose row is a multiple of 16"); - - arser.add_argument("--substitute_pack_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert single input Pack to Reshape"); - - arser.add_argument("--substitute_padv2_to_pad") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition PadV2 to Pad"); - - arser.add_argument("--substitute_splitv_to_split") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition SplitV to Split operator"); - - arser.add_argument("--substitute_squeeze_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition Squeeze to Reshape"); - - arser.add_argument("--substitute_strided_slice_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert certain condition Strided_Slice to Reshape"); - - arser.add_argument("--substitute_transpose_to_reshape") - .nargs(0) - .required(false) - .default_value(false) - .help("This will convert single input Transpose to Reshape"); - - arser.add_argument("--expand_broadcast_const") - .nargs(0) - .required(false) - .default_value(false) - .help("This will expand broadcastable constant inputs"); - - arser.add_argument("--convert_nchw_to_nhwc") - .nargs(0) - .required(false) - .default_value(false) - .help("Experimental: This will convert NCHW operators to NHWC under the assumption that " - "input model is NCHW."); - - arser.add_argument("--nchw_to_nhwc_input_shape") - .nargs(0) - .required(false) - .default_value(false) - .help("Convert the input shape of the model (argument for --convert_nchw_to_nhwc)."); - - arser.add_argument("--nchw_to_nhwc_output_shape") - .nargs(0) - .required(false) - .default_value(false) - .help("Convert the output shape of the model (argument for --convert_nchw_to_nhwc)."); - - arser.add_argument("--transform_min_max_to_relu6") - .nargs(0) - .required(false) - .default_value(false) - .help("Transform Minimum(6)-Maximum(0) pattern to Relu6 operator"); - - arser.add_argument("--transform_min_relu_to_relu6") - .nargs(0) - .required(false) - .default_value(false) - .help("Transform Minimum(6)-Relu pattern to Relu6 operator"); - - arser.add_argument("--mute_warnings") - .nargs(0) - .required(false) - .default_value(false) - .help("This will turn off warning messages"); - - arser.add_argument("--disable_validation") - .nargs(0) - .required(false) - .default_value(false) - .help("This will turn off operator validations. May help input model investigation."); - - arser.add_argument("--generate_profile_data") - .nargs(0) - .required(false) - .default_value(false) - .help("This will turn on profiling data generation."); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); + + add_switch(arser, "--fold_add_v2", "This will fold AddV2 operators with constant inputs"); + add_switch(arser, "--fold_cast", "This will fold Cast operators with constant input"); + add_switch(arser, "--fold_densify", + "This will fold Densify operators with sparse constant input"); + add_switch(arser, "--fold_dequantize", "This will fold dequantize op"); + add_switch(arser, "--fold_dwconv", + "This will fold Depthwise Convolution operator with constant inputs"); + add_switch(arser, "--fold_gather", "This will fold Gather operator"); + add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator"); + add_switch(arser, "--forward_reshape_to_unaryop", + "This will move Reshape after UnaryOp for centain condition"); + add_switch(arser, "--fuse_activation_function", + "This will fuse Activation function to a preceding operator"); + add_switch(arser, "--fuse_add_with_fully_connected", + "This will fuse Add operator to FullyConnected operator"); + add_switch(arser, "--fuse_add_with_tconv", + "This will fuse Add operator to Transposed Convolution operator"); + add_switch(arser, "--fuse_batchnorm_with_conv", + "This will fuse BatchNorm operators to Convolution operator"); + add_switch(arser, "--fuse_batchnorm_with_dwconv", + "This will fuse BatchNorm operators to Depthwise Convolution operator"); + add_switch(arser, "--fuse_batchnorm_with_tconv", + "This will fuse BatchNorm operators to Transposed Convolution operator"); + add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization"); + add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator"); + add_switch(arser, "--fuse_mean_with_mean", + "This will fuse two Mean operations when they follow one by one. This will fold them " + "into one operation and merge reduction indices."); + add_switch(arser, "--fuse_transpose_with_mean", + "This will fuse Mean operation with a preceding Transpose under certain conditions."); + add_switch(arser, "--make_batchnorm_gamma_positive", + "This will make negative gamma of BatchNorm into a small positive value (1e-10). " + "Note that this pass can change the execution result of the model. So, use it only " + "when the impact is known to be acceptable."); + add_switch(arser, "--fuse_preactivation_batchnorm", + "This will fuse BatchNorm operators of pre-activations to Convolution operator"); + add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators"); + add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence"); + add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators"); + add_switch(arser, "--remove_redundant_reshape", + "This will fuse or remove subsequent Reshape operators"); + add_switch(arser, "--remove_redundant_transpose", + "This will fuse or remove subsequent Transpose operators"); + add_switch(arser, "--remove_unnecessary_reshape", + "This will remove unnecessary reshape operators"); + add_switch(arser, "--remove_unnecessary_slice", "This will remove unnecessary slice operators"); + add_switch(arser, "--remove_unnecessary_strided_slice", + "This will remove unnecessary strided slice operators"); + add_switch(arser, "--remove_unnecessary_split", "This will remove unnecessary split operators"); + add_switch(arser, "--replace_cw_mul_add_with_depthwise_conv", + "This will replace channel-wise mul/add with DepthwiseConv2D operator"); + add_switch(arser, "--replace_sub_with_add", "This will replace sub with add operator"); + add_switch(arser, "--resolve_customop_add", "This will convert Custom(Add) to Add operator"); + add_switch(arser, "--resolve_customop_batchmatmul", + "This will convert Custom(BatchMatmul) to BatchMatmul operator"); + add_switch(arser, "--resolve_customop_matmul", + "This will convert Custom(Matmul) to Matmul operator"); + add_switch(arser, "--resolve_customop_max_pool_with_argmax", + "This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators"); + add_switch(arser, "--resolve_customop_splitv", + "This will convert Custom(SplitV) to SplitV operator"); + add_switch(arser, "--shuffle_weight_to_16x1float32", + "This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that " + "it only converts weights whose row is a multiple of 16"); + add_switch(arser, "--replace_non_const_fc_with_batch_matmul", + "Replace FullyConnected with BatchMatMul when its weight is non-constant"); + add_switch(arser, "--substitute_pack_to_reshape", + "This will convert single input Pack to Reshape"); + add_switch(arser, "--substitute_padv2_to_pad", + "This will convert certain condition PadV2 to Pad"); + add_switch(arser, "--substitute_splitv_to_split", + "This will convert certain condition SplitV to Split operator"); + add_switch(arser, "--substitute_squeeze_to_reshape", + "This will convert certain condition Squeeze to Reshape"); + add_switch(arser, "--substitute_strided_slice_to_reshape", + "This will convert certain condition Strided_Slice to Reshape"); + add_switch(arser, "--substitute_transpose_to_reshape", + "This will convert single input Transpose to Reshape"); + add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs"); + add_switch(arser, "--convert_nchw_to_nhwc", + "Experimental: This will convert NCHW operators to NHWC under the assumption that " + "input model is NCHW."); + add_switch(arser, "--nchw_to_nhwc_input_shape", + "Convert the input shape of the model (argument for --convert_nchw_to_nhwc)."); + add_switch(arser, "--nchw_to_nhwc_output_shape", + "Convert the output shape of the model (argument for --convert_nchw_to_nhwc)."); + add_switch(arser, "--transform_min_max_to_relu6", + "Transform Minimum(6)-Maximum(0) pattern to Relu6 operator"); + add_switch(arser, "--transform_min_relu_to_relu6", + "Transform Minimum(6)-Relu pattern to Relu6 operator"); + add_switch(arser, "--mute_warnings", "This will turn off warning messages"); + add_switch(arser, "--disable_validation", + "This will turn off operator validations. May help input model investigation."); + add_switch(arser, "--generate_profile_data", "This will turn on profiling data generation."); arser.add_argument("--change_outputs") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Experimental: Change first subgraph output nodes to CSV names"); - arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); - arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + arser.add_argument("input").help("Input circle model"); + arser.add_argument("output").help("Output circle model"); // sparsification argument - arser.add_argument("--sparsify_tensor") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Tensor name that you want to sparsify"); + arser.add_argument("--sparsify_tensor").help("Tensor name that you want to sparsify"); arser.add_argument("--sparsify_traversal_order") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("0,1,2,3") .help("Traversal order of dimensions. Default value: 0,1,2,3"); arser.add_argument("--sparsify_format") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("d,s") .help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default " "value: d,s"); - arser.add_argument("--sparsify_block_size") - .nargs(1) - .type(arser::DataType::STR) - .required(false) - .help("Size of each block dimension"); + arser.add_argument("--sparsify_block_size").help("Size of each block dimension"); arser.add_argument("--sparsify_block_map") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .default_value("0,1") .help("Map from block dimension to the original tensor dimension. Default value: 0,1"); @@ -446,20 +206,12 @@ int entry(int argc, char **argv) // If REPLACE is zero, it does not overwrite an existing value. setenv("LUCI_LOG", "100", 0); } - if (arser.get<bool>("--O1")) - { - options->enable(Algorithms::FuseBCQ); - options->enable(Algorithms::FuseInstanceNorm); - options->enable(Algorithms::ResolveCustomOpAdd); - options->enable(Algorithms::ResolveCustomOpBatchMatMul); - options->enable(Algorithms::ResolveCustomOpMatMul); - options->enable(Algorithms::RemoveRedundantTranspose); - options->enable(Algorithms::SubstitutePackToReshape); - } if (arser.get<bool>("--fold_add_v2")) options->enable(Algorithms::FoldAddV2); if (arser.get<bool>("--fold_cast")) options->enable(Algorithms::FoldCast); + if (arser.get<bool>("--fold_densify")) + options->enable(Algorithms::FoldDensify); if (arser.get<bool>("--fold_dequantize")) options->enable(Algorithms::FoldDequantize); if (arser.get<bool>("--fold_dwconv")) @@ -524,8 +276,12 @@ int entry(int argc, char **argv) options->enable(Algorithms::ResolveCustomOpMatMul); if (arser.get<bool>("--resolve_customop_max_pool_with_argmax")) options->enable(Algorithms::ResolveCustomOpMaxPoolWithArgmax); + if (arser.get<bool>("--resolve_customop_splitv")) + options->enable(Algorithms::ResolveCustomOpSplitV); if (arser.get<bool>("--shuffle_weight_to_16x1float32")) options->enable(Algorithms::ShuffleWeightTo16x1Float32); + if (arser.get<bool>("--replace_non_const_fc_with_batch_matmul")) + options->enable(Algorithms::ReplaceNonConstFCWithBatchMatMul); if (arser.get<bool>("--substitute_pack_to_reshape")) options->enable(Algorithms::SubstitutePackToReshape); if (arser.get<bool>("--substitute_padv2_to_pad")) @@ -595,37 +351,11 @@ int entry(int argc, char **argv) csv_tokenize(csv_nodes, new_outputs); } - // Load model from the file - foder::FileLoader file_loader{input_path}; - std::vector<char> model_data; - - try - { - model_data = file_loader.load(); - } - catch (const std::runtime_error &err) - { - std::cerr << err.what() << std::endl; - return EXIT_FAILURE; - } - - flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; - if (!circle::VerifyModelBuffer(verifier)) - { - std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; - return EXIT_FAILURE; - } - - const circle::Model *circle_model = circle::GetModel(model_data.data()); - if (circle_model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; - return EXIT_FAILURE; - } - // Import from input Circle file - luci::Importer importer; - auto module = importer.importModule(circle_model); + luci::ImporterEx importerex; + auto module = importerex.importVerifyModule(input_path); + if (module.get() == nullptr) + return EXIT_FAILURE; if (change_outputs) { diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp index 76d0f3f7f..9c4256b40 100644 --- a/compiler/circlechef/tools/file/Driver.cpp +++ b/compiler/circlechef/tools/file/Driver.cpp @@ -28,10 +28,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("recipe") - .type(arser::DataType::STR) - .help("Source recipe file path to convert"); - arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path"); + arser.add_argument("recipe").help("Source recipe file path to convert"); + arser.add_argument("circle").help("Target circle file path"); try { diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp index 639e0af6f..c8ef07c6f 100644 --- a/compiler/circlechef/tools/reverse/Driver.cpp +++ b/compiler/circlechef/tools/reverse/Driver.cpp @@ -25,10 +25,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle") - .type(arser::DataType::STR) - .help("Source circle file path to convert"); - arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path"); + arser.add_argument("circle").help("Source circle file path to convert"); + arser.add_argument("recipe").help("Target recipe file path"); try { diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt index b65c06677..7485ff8e7 100644 --- a/compiler/circledump/CMakeLists.txt +++ b/compiler/circledump/CMakeLists.txt @@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circledump ${DRIVER} ${SOURCES}) target_include_directories(circledump PRIVATE include) target_link_libraries(circledump arser) +target_link_libraries(circledump foder) target_link_libraries(circledump mio_circle04) target_link_libraries(circledump mio_circle04_helper) target_link_libraries(circledump safemain) diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp index 657f24fe0..5b0871a91 100644 --- a/compiler/circledump/driver/Driver.cpp +++ b/compiler/circledump/driver/Driver.cpp @@ -15,7 +15,7 @@ */ #include <arser/arser.h> -#include <circleread/Model.h> +#include <foder/FileLoader.h> #include <circledump/Dump.h> #include <iostream> @@ -23,7 +23,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to dump"); + arser.add_argument("circle").help("Circle file path to dump"); try { @@ -38,14 +38,10 @@ int entry(int argc, char **argv) std::string circle_path = arser.get<std::string>("circle"); // Load Circle model from a circle file - std::unique_ptr<circleread::Model> model = circleread::load_circle(circle_path); - if (model == nullptr) - { - std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl; - return 255; - } - - const circle::Model *circlemodel = model->model(); + foder::FileLoader fileLoader{circle_path}; + std::vector<char> modelData = fileLoader.load(); + const circle::Model *circlemodel = circle::GetModel(modelData.data()); + // const circle::Model *circlemodel = model->model(); if (circlemodel == nullptr) { std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl; diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake index 362d67cf4..183dfe227 100644 --- a/compiler/circledump/requires.cmake +++ b/compiler/circledump/requires.cmake @@ -1,3 +1,4 @@ require("arser") +require("foder") require("mio-circle04") require("safemain") diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp index 0b256dda8..69427a20e 100644 --- a/compiler/circledump/src/Dump.cpp +++ b/compiler/circledump/src/Dump.cpp @@ -16,8 +16,8 @@ #include <circledump/Dump.h> #include <mio_circle/Helper.h> +#include <mio_circle/Reader.h> -#include "Read.h" #include "OpPrinter.h" #include "MetadataPrinter.h" @@ -122,7 +122,7 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect) return os; } -void dump_sub_graph(std::ostream &os, circleread::Reader &reader) +void dump_sub_graph(std::ostream &os, mio::circle::Reader &reader) { auto tensors = reader.tensors(); auto operators = reader.operators(); @@ -150,14 +150,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) std::vector<int32_t> dims = {-1}; if (tensor->shape()) - dims = circleread::as_index_vector(tensor->shape()); + dims = mio::circle::as_index_vector(tensor->shape()); os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor) << " "; os << "(" << dims << ") "; if (tensor->shape_signature()) { - std::vector<int32_t> dims_sig = circleread::as_index_vector(tensor->shape_signature()); + std::vector<int32_t> dims_sig = mio::circle::as_index_vector(tensor->shape_signature()); os << "(" << dims_sig << ") "; } os << "B(" << tensor->buffer() << ") "; @@ -299,8 +299,8 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) const auto op = operators->Get(i); circle::BuiltinOperator builtincode = reader.builtin_code(op); - const std::vector<int32_t> &inputs = circleread::as_index_vector(op->inputs()); - const std::vector<int32_t> &outputs = circleread::as_index_vector(op->outputs()); + const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs()); + const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs()); auto op_name = reader.opcode_name(op); os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " "; @@ -356,7 +356,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader) void dump_model(std::ostream &os, const circle::Model *model) { - circleread::Reader reader(model); + mio::circle::Reader reader(model); uint32_t num_subgraph = reader.num_subgraph(); diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp deleted file mode 100644 index 67e7fa5a6..000000000 --- a/compiler/circledump/src/Load.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <circleread/Model.h> - -#include <fcntl.h> -#include <unistd.h> -#include <sys/stat.h> -#include <sys/mman.h> - -namespace -{ - -class MemoryMappedModel final : public circleread::Model -{ -public: - /** - * @require fd and data SHOULD be valid - */ - explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size} - { - // DO NOTHING - } - -public: - ~MemoryMappedModel() - { - munmap(_data, _size); - close(_fd); - } - -public: - MemoryMappedModel(const MemoryMappedModel &) = delete; - MemoryMappedModel(MemoryMappedModel &&) = delete; - -public: - const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); } - -private: - int _fd = -1; - void *_data = nullptr; - size_t _size = 0; -}; - -class FileDescriptor final -{ -public: - FileDescriptor(int value) : _value{value} - { - // DO NOTHING - } - -public: - // NOTE Copy is not allowed - FileDescriptor(const FileDescriptor &) = delete; - -public: - // NOTE Move is allowed - FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); } - -public: - ~FileDescriptor() - { - if (_value != -1) - { - // Close on destructor - close(_value); - } - } - -public: - int value(void) const { return _value; } - -public: - int release(void) - { - auto res = _value; - _value = -1; - return res; - } - -private: - int _value = -1; -}; - -} // namespace - -namespace circleread -{ - -std::unique_ptr<Model> load_circle(const std::string &path) -{ - FileDescriptor fd = open(path.c_str(), O_RDONLY); - - if (fd.value() == -1) - { - // Return nullptr on open failure - return nullptr; - } - - struct stat st; - if (fstat(fd.value(), &st) == -1) - { - // Return nullptr on fstat failure - return nullptr; - } - - auto size = st.st_size; - auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0); - - if (data == MAP_FAILED) - { - // Return nullptr on mmap failure - return nullptr; - } - - return std::unique_ptr<circleread::Model>{new MemoryMappedModel(fd.release(), data, size)}; -} - -} // namespace circleread diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp index 02e5c26b5..817371dcf 100644 --- a/compiler/circledump/src/OpPrinter.cpp +++ b/compiler/circledump/src/OpPrinter.cpp @@ -15,7 +15,8 @@ */ #include "OpPrinter.h" -#include "Read.h" + +#include <mio_circle/Helper.h> #include <memory> @@ -233,7 +234,7 @@ public: { if (auto *reshape_params = op->builtin_options_as_ReshapeOptions()) { - auto new_shape = circleread::as_index_vector(reshape_params->new_shape()); + auto new_shape = mio::circle::as_index_vector(reshape_params->new_shape()); os << " "; os << "NewShape(" << new_shape << ")"; os << std::endl; @@ -802,6 +803,7 @@ OpPrinterRegistry::OpPrinterRegistry() // There is no Option for CEIL _op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>(); _op_map[circle::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>(); + // There is no Option for DENSIFY _op_map[circle::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>(); _op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>(); // There is no Option for DEQUANTIZE diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp deleted file mode 100644 index 3a7e98cde..000000000 --- a/compiler/circledump/src/Read.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "Read.h" - -#include <mio_circle/Helper.h> - -#include <sstream> -#include <string> - -namespace circleread -{ - -Reader::Reader(const circle::Model *model) -{ - _version = model->version(); - _subgraphs = model->subgraphs(); - _buffers = model->buffers(); - _metadata = model->metadata(); - _signature_defs = model->signature_defs(); - - auto opcodes = model->operator_codes(); - for (const ::circle::OperatorCode *opcode : *opcodes) - { - _op_codes.push_back(opcode); - } -} - -size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) -{ - *buff_data = nullptr; - - if (buf_idx == 0) - return 0; - - if (auto *buffer = (*_buffers)[buf_idx]) - { - if (auto *array = buffer->data()) - { - if (size_t size = array->size()) - { - *buff_data = reinterpret_cast<const uint8_t *>(array->data()); - return size; - } - } - } - - return 0; -} - -circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - return opcode->builtin_code(); -} - -std::string Reader::opcode_name(const circle::Operator *op) const -{ - uint32_t index = op->opcode_index(); - assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); - - if (!mio::circle::is_valid(opcode)) - { - std::ostringstream oss; - oss << "(invalid: " << index << ")"; - return oss.str(); - } - - return mio::circle::opcode_name(opcode); -} - -bool Reader::select_subgraph(uint32_t sgindex) -{ - _subgraph_index = sgindex; - _tensors = nullptr; - _operators = nullptr; - - _inputs.clear(); - _outputs.clear(); - - if (_subgraphs->Length() <= sgindex) - { - assert(false); - return false; - } - - const circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; - - auto name = subgraph->name(); - _subgraph_name = name ? name->c_str() : "(noname)"; - - _tensors = subgraph->tensors(); - _operators = subgraph->operators(); - _data_format = subgraph->data_format(); - - _inputs = as_index_vector(subgraph->inputs()); - _outputs = as_index_vector(subgraph->outputs()); - - return true; -} - -} // namespace circleread diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt index 0fb99ddba..4ab0ea218 100644 --- a/compiler/cli/CMakeLists.txt +++ b/compiler/cli/CMakeLists.txt @@ -10,5 +10,5 @@ endif(NOT ENABLE_TEST) nnas_find_package(GTest QUIET) -GTest_AddTEst(cli_test ${TESTS}) +GTest_AddTest(cli_test ${TESTS}) target_link_libraries(cli_test cli) diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp index 420cf6f0c..0db78941c 100644 --- a/compiler/coco/core/src/IR/Module.cpp +++ b/compiler/coco/core/src/IR/Module.cpp @@ -144,7 +144,7 @@ std::unique_ptr<Module> Module::create(void) m->_input = make_unique<coco::InputList>(); m->_output = make_unique<coco::OutputList>(); - return std::move(m); + return m; } } // namespace coco diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp index 5ab7069ee..361dcc243 100644 --- a/compiler/coco/generic/src/IR/Data.cpp +++ b/compiler/coco/generic/src/IR/Data.cpp @@ -209,8 +209,7 @@ std::unique_ptr<Data> Data::create(void) data->_blob = std::move(blob); data->_fp32 = std::move(fp32); - // GCC 4.9 tries to copy data (while GCC 6.X doesn't) - return std::move(data); + return data; } } // namespace coco diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt index 404149c15..34a3a4d7d 100644 --- a/compiler/common-artifacts/CMakeLists.txt +++ b/compiler/common-artifacts/CMakeLists.txt @@ -12,14 +12,6 @@ if(${PYTHON_VERSION_MINOR} LESS 8) return() endif() -# Create python virtual environment with tensorflow 2.6.0 -set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0") - -add_custom_command( - OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0} - COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0} -) - # Create python virtual environment with tensorflow 2.8.0 set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0") @@ -30,33 +22,36 @@ add_custom_command( # Create requirements.txt and install required pip packages set(REQUIREMENTS_FILE "requirements.txt") -set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}") set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}") -add_custom_command( - OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools - COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade - DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} -) +set(PYTHON_OVERLAY python3) +if(PYTHON_EXECUTABLE MATCHES python3.8) + set(PYTHON_OVERLAY python3.8) +endif() +# NOTE when using behind proxy with self signed certificate, need to set '--trusted-host' options +set(PIP_OPTION_TRUSTED_HOST ) +if(DEFINED ENV{ONE_PIP_OPTION_TRUST_HOST}) + set(PIP_OPTION_TRUSTED_HOST --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --trusted-host pypi.org) +endif() + +# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051 +# TODO remove protobuf==3.20.1 when issue is resolved add_custom_command( OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} - COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools - COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade + COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.20.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} + COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000 + ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools + COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000 + ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0} ) add_custom_target(common_artifacts_python_deps ALL - DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} - ${VIRTUALENV_OVERLAY_TF_2_8_0} - ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0} ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} ) @@ -246,7 +241,13 @@ foreach(RECIPE IN ITEMS ${RECIPES}) if(NOT DEFINED NO_OPTIMIZE_${RECIPE}) # Generate optimized .circle add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH} - COMMAND $<TARGET_FILE:circle2circle> --O1 ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH} + # NOTE --resolve_customop_add is just to added for old -O1, no particular meaning + # --fold_dequantize is added to fold Tensor(FLOAT16) + DEQUANTIZE (Net_Dequantize_Add) + # model. FLOAT16 in general is NOT supported but only Tensor(FLOAT16) + DEQUANTIZE + # sequence accepted as folded to Tensor(FLOAT32). + # TODO revise giving options from the list file + COMMAND $<TARGET_FILE:circle2circle> --resolve_customop_add --fold_dequantize --fold_densify + ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH} DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_OUTPUT_PATH} COMMENT "Generate ${OPT_CIRCLE_FILE}" ) diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst index 92b07fde8..2275a42d9 100644 --- a/compiler/common-artifacts/exclude.lst +++ b/compiler/common-artifacts/exclude.lst @@ -32,6 +32,7 @@ tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator tcgenerate(Ceil_000) tcgenerate(Conv2D_003) # runtime doesn't support dilation tcgenerate(Cos_000) +tcgenerate(Densify_000) # luci-interpreter doesn't support tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet @@ -67,6 +68,8 @@ tcgenerate(Neg_000) tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet tcgenerate(Net_Dangle_001) +tcgenerate(Net_Densify_Add_000) # luci-interpreter doesn't support Densify yet +tcgenerate(Net_Densify_Dequantize_Add_000) # luci-interpreter doesn't support Densify/Dequantize yet tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim tcgenerate(OneHot_000) diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp index 33cecbbe2..7481050c5 100644 --- a/compiler/common-artifacts/src/TestDataGenerator.cpp +++ b/compiler/common-artifacts/src/TestDataGenerator.cpp @@ -142,23 +142,15 @@ void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test"); - arser.add_argument("--input_data") - .required(true) - .nargs(1) - .type(arser::DataType::STR) - .help("Path to generate input data h5 file"); + arser.add_argument("circle").help("Circle file you want to test"); + arser.add_argument("--input_data").required(true).help("Path to generate input data h5 file"); arser.add_argument("--expected_data") .required(true) - .nargs(1) - .type(arser::DataType::STR) .help("Path to generate expected data h5 file"); arser.add_argument("--fixed_seed") - .required(false) .nargs(0) .help("Put a fixed seed into the random number generator"); arser.add_argument("--input_range") - .required(false) .nargs(3) .type(arser::DataType::STR_VEC) .help("Set random number range [min max] for the input as 'name min max'"); diff --git a/compiler/crew/CMakeLists.txt b/compiler/crew/CMakeLists.txt index 1824d86ab..45cda7562 100644 --- a/compiler/crew/CMakeLists.txt +++ b/compiler/crew/CMakeLists.txt @@ -12,9 +12,12 @@ if(NOT ENABLE_TEST) return() endif(NOT ENABLE_TEST) +configure_file("src/test_read_semicolon.ini" "test_read_semicolon.ini" COPYONLY) + nnas_find_package(GTest REQUIRED) GTest_AddTest(crew_test ${TESTS}) target_include_directories(crew_test PRIVATE src) target_link_libraries(crew_test nncc_common) target_link_libraries(crew_test crew) +target_link_libraries(crew_test foder) diff --git a/compiler/crew/src/PConfigIni.cpp b/compiler/crew/src/PConfigIni.cpp index f0e3e8e01..5177843bf 100644 --- a/compiler/crew/src/PConfigIni.cpp +++ b/compiler/crew/src/PConfigIni.cpp @@ -26,10 +26,36 @@ #include <sstream> #include <stdexcept> #include <string> +#include <vector> namespace crew { +namespace +{ + +std::string filter_escape(const std::string &source) +{ + std::string key = source; + + // if key is surrounded with quotation + // TODO for quotation + + // if key has '\\' + ';', remove '\\' + auto pos = key.find("\\;"); + while (pos != std::string::npos) + { + auto k1 = key.substr(0, pos); + auto k2 = key.substr(pos + 1); + key = k1 + k2; + pos = key.find("\\;"); + } + + return key; +} + +} // namespace + Sections read_ini(const char *data, size_t length) { assert(data != nullptr); @@ -84,6 +110,7 @@ Sections read_ini(const char *data, size_t length) { auto key = string_line.substr(0, pos); auto val = string_line.substr(pos + 1); + key = filter_escape(key); section.items.emplace(key, val); } } @@ -107,11 +134,53 @@ Sections read_ini(const std::string &path) return read_ini(ini_data.data(), ini_data.size()); } +namespace +{ + +void replace(std::string &source, const std::string &token, const std::string &replace) +{ + size_t pos = 0; + while ((pos = source.find(token, pos)) != std::string::npos) + { + source.replace(pos, token.length(), replace); + pos += replace.length(); // Handles the case where 'replace' is a substring of 'token' + } +} + +Sections insert_escape(const Sections &inputs) +{ + Sections sections; + + // for all section in sections; + // if key has ';' then replace with '\;' + for (auto &input : inputs) + { + Section section; + section.name = input.name; + + for (auto &item : input.items) + { + auto key = item.first; + auto value = item.second; + + replace(key, ";", "\\;"); + section.items[key] = value; + } + sections.push_back(section); + } + + return sections; +} + +} // namespace + void write_ini(std::ostream &os, const Sections §ions) { std::stringstream ss; - ss << sections; + auto processed = insert_escape(sections); + + ss << processed; std::string strss = ss.str(); diff --git a/compiler/crew/src/PConfigIni.test.cpp b/compiler/crew/src/PConfigIni.test.cpp index bdd2ccc1f..c062c6937 100644 --- a/compiler/crew/src/PConfigIni.test.cpp +++ b/compiler/crew/src/PConfigIni.test.cpp @@ -17,12 +17,14 @@ #include "crew/PConfigIni.h" #include "crew/PConfigIniDump.h" +#include <foder/FileLoader.h> + #include <gtest/gtest.h> #include <sstream> #include <stdexcept> -TEST(ConfigIniTest, read_ini_non_exist_file) +TEST(ConfigIniTest, read_ini_non_exist_file_NEG) { EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error); } @@ -85,3 +87,60 @@ TEST(ConfigIniTest, write_ini_file_error_NEG) crew::Sections sections; EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error); } + +TEST(ConfigIniTest, read_file_escape_semicolon) +{ + auto sections = crew::read_ini("test_read_semicolon.ini"); + ASSERT_EQ(1UL, sections.size()); + + auto its = sections.begin(); + ASSERT_NE(sections.end(), its); + EXPECT_TRUE("hello" == its->name); + ASSERT_EQ(1UL, its->items.size()); + + auto it = its->items.begin(); + ASSERT_NE(its->items.end(), it); + + EXPECT_TRUE("keya;keyb;keyc;keyd" == it->first); + EXPECT_TRUE("world" == it->second); +} + +TEST(ConfigIniTest, write_file_escape_semicolon) +{ + std::string path("test_write_semicolon.ini"); + + // save key with ';' + { + crew::Sections sections; + crew::Section hello; + hello.name = "hello"; + hello.items["keya;keyb;keyc;keyd"] = "world"; + sections.push_back(hello); + crew::write_ini(path, sections); + } + + // load the file and check if there is '\\' + std::string strbuffer; + { + foder::FileLoader file_loader{path}; + auto ini_data = file_loader.load(); + + auto buffer = std::vector<char>(); + auto length = ini_data.size(); + buffer.reserve(length + 1); + + char *pbuffer = buffer.data(); + memcpy(pbuffer, ini_data.data(), length); + *(pbuffer + length) = 0; + + strbuffer = pbuffer; + } + int32_t count = 0; + size_t pos = 0; + while ((pos = strbuffer.find("\\;", pos)) != std::string::npos) + { + count++; + pos++; + } + EXPECT_TRUE(count == 3); +} diff --git a/compiler/crew/src/test_read_semicolon.ini b/compiler/crew/src/test_read_semicolon.ini new file mode 100644 index 000000000..d966fb707 --- /dev/null +++ b/compiler/crew/src/test_read_semicolon.ini @@ -0,0 +1,2 @@ +[hello] +keya\;keyb\;keyc\;keyd=world diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp index 7f9456239..63baf0b31 100644 --- a/compiler/enco/core/src/CppGen/Host.cpp +++ b/compiler/enco/core/src/CppGen/Host.cpp @@ -299,7 +299,7 @@ std::unique_ptr<pp::MultiLineText> HostBlockCompiler::compile(const coco::Block res->append(ins->accept(prn)); } - return std::move(res); + return res; } } // namespace enco diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp index 599b0794e..3fc14edf5 100644 --- a/compiler/enco/core/src/CppGen/Subnet.cpp +++ b/compiler/enco/core/src/CppGen/Subnet.cpp @@ -373,7 +373,7 @@ std::unique_ptr<SubnetStruct> SubnetStructBuilder::build(const ANNBinder *binder // Finalize compilation res->ctor()->append("ANeuralNetworksCompilation_finish(", cname, ");"); - return std::move(res); + return res; } std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder *binder) const @@ -415,7 +415,7 @@ std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder res->append("ANeuralNetworksExecution_free(execution);"); - return std::move(res); + return res; } } // namespace enco diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp index 714c27a72..4bb21b0a7 100644 --- a/compiler/enco/core/src/Transforms/Split.cpp +++ b/compiler/enco/core/src/Transforms/Split.cpp @@ -656,7 +656,7 @@ public: app->ofm(ofm); app->ker(ker); - return std::move(app); + return app; } else { @@ -676,7 +676,7 @@ public: app->ofm(ofm); app->ker(ker); - return std::move(app); + return app; } } } @@ -704,7 +704,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asMul()) @@ -731,7 +731,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asPadF()) @@ -754,7 +754,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto maxpool = eval->op()->asMaxPool2D()) @@ -779,7 +779,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto avgpool = eval->op()->asAvgPool2D()) @@ -808,7 +808,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } } @@ -831,7 +831,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto relu6 = eval->op()->asReLU6()) @@ -853,7 +853,7 @@ public: app->ifm(ifm); app->ofm(ofm); - return std::move(app); + return app; } } else if (auto op = eval->op()->asConcatF()) @@ -880,7 +880,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asSub()) @@ -907,7 +907,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } else if (auto op = eval->op()->asDiv()) @@ -934,7 +934,7 @@ public: app->right(right); app->out(out); - return std::move(app); + return app; } } @@ -967,7 +967,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins) app->left(depth_concat->fst()->asFeature()); app->right(depth_concat->snd()->asFeature()); - return std::move(app); + return app; } // Build ANN IR from ANNConv2D instruction @@ -986,7 +986,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins) app->ker(conv2d->ker()->asKernel()); app->bias(coco::safe_cast<coco::FeatureObject>(conv2d->bias())); - return std::move(app); + return app; } return nullptr; diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp index aa2cad705..32ad44385 100644 --- a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp +++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp @@ -25,6 +25,8 @@ #include <loco/Service/TypeInference.h> #include <loco/Service/ShapeInference.h> +#include <limits> + namespace exo { diff --git a/compiler/kuma/src/IntervalSet.h b/compiler/kuma/src/IntervalSet.h index 3b6c5f666..1e26581c0 100644 --- a/compiler/kuma/src/IntervalSet.h +++ b/compiler/kuma/src/IntervalSet.h @@ -17,6 +17,7 @@ #ifndef __KUMA_DETAILS_LIVE_INTERVAL_SET_H__ #define __KUMA_DETAILS_LIVE_INTERVAL_SET_H__ +#include <cstdint> #include <map> namespace kuma diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h index 1f78c9fec..6be46c3b3 100644 --- a/compiler/loco/include/loco/IR/DataTypeTraits.h +++ b/compiler/loco/include/loco/IR/DataTypeTraits.h @@ -83,6 +83,13 @@ template <> struct DataTypeImpl<DataType::U64> using Type = uint64_t; }; +template <> struct DataTypeImpl<DataType::FLOAT16> +{ + // float16 type with 16bit value, encoded with help of FP16 library + // https://github.com/Maratyszcza/FP16/ + using Type = uint16_t; +}; + template <> struct DataTypeImpl<DataType::FLOAT32> { // Use C++ float type for IEEE 32-bit floating-point numbers @@ -132,6 +139,8 @@ inline uint32_t size(DataType data_type) return sizeof(DataTypeImpl<DataType::S64>::Type); case DataType::U64: return sizeof(DataTypeImpl<DataType::U64>::Type); + case DataType::FLOAT16: + return sizeof(DataTypeImpl<DataType::FLOAT16>::Type); case DataType::FLOAT32: return sizeof(DataTypeImpl<DataType::FLOAT32>::Type); case DataType::FLOAT64: diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp index 500f08623..40ddb133b 100644 --- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp +++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp @@ -122,9 +122,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) { using namespace loco; - auto encoder = encode_node->encoder(); - assert(encoder != nullptr); - auto decode_node = dynamic_cast<loco::FeatureDecode *>(encode_node->input()); if (decode_node == nullptr) { @@ -132,6 +129,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) } assert(decode_node->input() != nullptr); + auto encoder = encode_node->encoder(); + assert(encoder != nullptr); + auto decoder = decode_node->decoder(); assert(decoder != nullptr); @@ -302,9 +302,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) { using namespace loco; - auto encoder = encode_node->encoder(); - assert(encoder != nullptr); - auto decode_node = dynamic_cast<loco::MatrixDecode *>(encode_node->input()); if (decode_node == nullptr) { @@ -312,6 +309,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g) } assert(decode_node->input() != nullptr); + auto encoder = encode_node->encoder(); + assert(encoder != nullptr); + auto decoder = decode_node->decoder(); assert(decoder != nullptr); diff --git a/compiler/luci-eval-driver/src/EvalDriver.cpp b/compiler/luci-eval-driver/src/EvalDriver.cpp index 4762cffe7..0ed35431d 100644 --- a/compiler/luci-eval-driver/src/EvalDriver.cpp +++ b/compiler/luci-eval-driver/src/EvalDriver.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include <luci/Importer.h> +#include <luci/ImporterEx.h> #include <luci_interpreter/Interpreter.h> #include <luci/CircleExporter.h> #include <luci/CircleFileExpContract.h> @@ -47,18 +47,6 @@ void writeDataToFile(const std::string &filename, const char *data, size_t data_ } } -std::unique_ptr<luci::Module> importModel(const std::string &filename) -{ - std::ifstream fs(filename, std::ifstream::binary); - if (fs.fail()) - { - throw std::runtime_error("Cannot open model file \"" + filename + "\".\n"); - } - std::vector<char> model_data((std::istreambuf_iterator<char>(fs)), - std::istreambuf_iterator<char>()); - return luci::Importer().importModule(circle::GetModel(model_data.data())); -} - template <typename NodeT> size_t getTensorSize(const NodeT *node) { uint32_t tensor_size = loco::size(node->dtype()); @@ -91,7 +79,8 @@ int entry(int argc, char **argv) const char *output_file = argv[4]; // Load model from the file - std::unique_ptr<luci::Module> module = importModel(filename); + luci::ImporterEx importer; + std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename); if (module == nullptr) { std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl; diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst index d134a6b95..f0df58db3 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -12,6 +12,7 @@ REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape) REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) REGISTER_KERNEL(Softmax) REGISTER_KERNEL(SpaceToBatchND) REGISTER_KERNEL(SpaceToDepth) diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h index 15ff0327b..efa6b167e 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h +++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h @@ -18,7 +18,7 @@ #define LUCI_INTERPRETER_PAL_DEQUANTIZE_H #include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h index 6046789ae..effb85d54 100644 --- a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h +++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h @@ -17,7 +17,7 @@ #ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H #define LUCI_INTERPRETER_PAL_QUANTIZE_H -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h new file mode 100644 index 000000000..813b1ec2c --- /dev/null +++ b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h @@ -0,0 +1,1568 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H +#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H + +#include <stdint.h> +#include <sys/types.h> + +#include <algorithm> +#include <cmath> +#include <cstring> +#include <functional> +#include <limits> +#include <memory> +#include <type_traits> + +#include "third_party/eigen3/Eigen/Core" +#include "fixedpoint/fixedpoint.h" +#include "ruy/profiler/instrumentation.h" // from @ruy +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/add.h" +#include "tensorflow/lite/kernels/internal/reference/add_n.h" +#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h" +#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h" +#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h" +#include "tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "tensorflow/lite/kernels/internal/reference/cast.h" +#include "tensorflow/lite/kernels/internal/reference/ceil.h" +#include "tensorflow/lite/kernels/internal/reference/comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/concatenation.h" +#include "tensorflow/lite/kernels/internal/reference/conv.h" +#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h" +#include "tensorflow/lite/kernels/internal/reference/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/div.h" +#include "tensorflow/lite/kernels/internal/reference/elu.h" +#include "tensorflow/lite/kernels/internal/reference/exp.h" +#include "tensorflow/lite/kernels/internal/reference/fill.h" +#include "tensorflow/lite/kernels/internal/reference/floor.h" +#include "tensorflow/lite/kernels/internal/reference/floor_div.h" +#include "tensorflow/lite/kernels/internal/reference/floor_mod.h" +#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/gather.h" +#include "tensorflow/lite/kernels/internal/reference/hard_swish.h" +#include "tensorflow/lite/kernels/internal/reference/l2normalization.h" +#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" +#include "tensorflow/lite/kernels/internal/reference/log_softmax.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/neg.h" +#include "tensorflow/lite/kernels/internal/reference/pad.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/prelu.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/reference/quantize.h" +#include "tensorflow/lite/kernels/internal/reference/reduce.h" +#include "tensorflow/lite/kernels/internal/reference/requantize.h" +#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h" +#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h" +#include "tensorflow/lite/kernels/internal/reference/round.h" +#include "tensorflow/lite/kernels/internal/reference/softmax.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h" +#include "tensorflow/lite/kernels/internal/reference/strided_slice.h" +#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/sub.h" +#include "tensorflow/lite/kernels/internal/reference/tanh.h" +#include "tensorflow/lite/kernels/internal/reference/transpose.h" +#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h" +#include "tensorflow/lite/kernels/internal/strided_slice_logic.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/internal/types.h" +namespace tflite +{ + +namespace reference_ops +{ + +template <typename T> +inline void Relu(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T lower = 0; + const T clamped = val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template <typename T> +inline void Relu1(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Relu1 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T upper = 1; + const T lower = -1; + const T clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +inline void Relu6(const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("Relu6 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const float val = input_data[i]; + const float upper = 6; + const float lower = 0; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template <typename T> +inline void ReluX(const tflite::ReluParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const int32 val = static_cast<int32_t>(input_data[i]); + int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset, + params.output_multiplier, + params.output_shift); + clamped = std::max(params.quantized_activation_min, clamped); + clamped = std::min(params.quantized_activation_max, clamped); + output_data[i] = static_cast<T>(clamped); + } +} + +template <typename T> +inline void ReluX(const tflite::ActivationParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + const T max_value = params.quantized_activation_max; + const T min_value = params.quantized_activation_min; + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T clamped = val > max_value ? max_value : val < min_value ? min_value : val; + output_data[i] = clamped; + } +} + +// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params, + const RuntimeShape &unswitched_input1_shape, + const uint8 *unswitched_input1_data, + const RuntimeShape &unswitched_input2_shape, + const uint8 *unswitched_input2_data, + const RuntimeShape &output_shape, uint8 *output_data) +{ + ArithmeticParams switched_params = unswitched_params; + switched_params.input1_offset = unswitched_params.input2_offset; + switched_params.input2_offset = unswitched_params.input1_offset; + + const bool use_unswitched = unswitched_params.broadcast_category == + tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast; + + const ArithmeticParams ¶ms = use_unswitched ? unswitched_params : switched_params; + const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data; + const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data; + + // Fivefold nested loops. The second input resets its position for each + // iteration of the second loop. The first input resets its position at the + // beginning of the fourth loop. The innermost loop is an elementwise Mul of + // sections of the arrays. + uint8 *output_data_ptr = output_data; + const uint8 *input1_data_ptr = input1_data; + const uint8 *input2_data_reset = input2_data; + int y0 = params.broadcast_shape[0]; + int y1 = params.broadcast_shape[1]; + int y2 = params.broadcast_shape[2]; + int y3 = params.broadcast_shape[3]; + int y4 = params.broadcast_shape[4]; + for (int i0 = 0; i0 < y0; ++i0) + { + const uint8 *input2_data_ptr; + for (int i1 = 0; i1 < y1; ++i1) + { + input2_data_ptr = input2_data_reset; + for (int i2 = 0; i2 < y2; ++i2) + { + for (int i3 = 0; i3 < y3; ++i3) + { + MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr); + input2_data_ptr += y4; + output_data_ptr += y4; + } + input1_data_ptr += y4; + } + } + input2_data_reset = input2_data_ptr; + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16"); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + output_data[i] = unclamped_result.raw(); + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16Uint8"); + int32 output_offset = params.output_offset; + int32 output_activation_min = params.quantized_activation_min; + int32 output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result); + clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + +inline void Sub16(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16_t *input1_data, const RuntimeShape &input2_shape, + const int16_t *input2_data, const RuntimeShape &output_shape, + int16_t *output_data) +{ + ruy::profiler::ScopeLabel label("Sub/Int16"); + const int input1_shift = params.input1_shift; + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + const int16 output_activation_min = params.quantized_activation_min; + const int16 output_activation_max = params.quantized_activation_max; + + TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); + TFLITE_DCHECK_LE(input1_shift, 0); + TFLITE_DCHECK_LE(params.input2_shift, 0); + const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data; + const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift; + + if (input1_shift == 0) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(input_ready_scaled, scaled_input); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } + else + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(scaled_input, input_ready_scaled); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } +} + +template <typename Scalar> +void Pack(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Pack"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + int inputs_count = params.inputs_count; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = params.axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + for (int i = 0; i < inputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + const Scalar *input_ptr = input_data[i] + copy_size * k; + int loc = k * inputs_count * copy_size + i * copy_size; + memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar)); + } + } +} + +template <typename Scalar> +void Unpack(const UnpackParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *const *output_datas) +{ + ruy::profiler::ScopeLabel label("Unpack"); + const int dimensions = input_shape.DimensionsCount(); + const int outputs_count = params.num_split; + + int outer_size = 1; + int axis = params.axis; + if (axis < 0) + { + axis += dimensions; + } + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, dimensions); + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; ++i) + { + copy_size *= input_shape.Dims(i); + } + TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size); + + for (int i = 0; i < outputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + Scalar *output_ptr = output_datas[i] + copy_size * k; + int loc = k * outputs_count * copy_size + i * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template <typename Scalar> +void PackWithScaling(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const uint8 *const *input_data, const RuntimeShape &output_shape, + uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("PackWithScaling"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + const int32 *input_zeropoint = params.input_zeropoint; + const float *input_scale = params.input_scale; + int inputs_count = params.inputs_count; + const int32 output_zeropoint = params.output_zeropoint; + const float output_scale = params.output_scale; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + Scalar *output_ptr = output_data; + const float inverse_output_scale = 1.f / output_scale; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < inputs_count; ++i) + { + if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale) + { + memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar)); + } + else + { + assert(false); + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + auto input_ptr = input_data[i]; + for (int j = 0; j < copy_size; ++j) + { + const int value = + static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint; + output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + +template <typename Scalar> +void DepthConcatenation(const ConcatenationParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, + Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("DepthConcatenation"); + auto params_copy = params; + params_copy.axis = 3; + Concatenation(params_copy, input_shapes, input_data, output_shape, output_data); +} + +inline void LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const float *input_data, const RuntimeShape &unextended_prev_activ_shape, + const float *prev_activ_data, const RuntimeShape &weights_shape, + const float *weights_data, const RuntimeShape &unextended_bias_shape, + const float *bias_data, const RuntimeShape &unextended_prev_state_shape, + const float *prev_state_data, + const RuntimeShape &unextended_output_state_shape, float *output_state_data, + const RuntimeShape &unextended_output_activ_shape, float *output_activ_data, + const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data, + const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data) +{ + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, + output_state_shape, 0, output_activ_shape, 0); + const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, + output_state_shape, 1, output_activ_shape, 1); + const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, + output_state_shape, 2, output_activ_shape, 2); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector<float const *> concat_input_arrays_data; + std::vector<RuntimeShape const *> concat_input_arrays_shapes; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_shapes.push_back(&input_shape); + concat_input_arrays_shapes.push_back(&prev_activ_shape); + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = concat_input_arrays_data.size(); + Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]), + concat_temp_shape, concat_temp_data); + + // Fully connected + tflite::FullyConnectedParams fc_params; + fc_params.float_activation_min = std::numeric_limits<float>::lowest(); + fc_params.float_activation_max = std::numeric_limits<float>::max(); + FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data, + bias_shape, bias_data, activ_temp_shape, activ_temp_data); + + // Memory state update (the LSTM "guts") + for (int b = 0; b < batches; ++b) + { + for (int w = 0; w < width; ++w) + { + for (int h = 0; h < height; ++h) + { + for (int c = 0; c < output_depth; ++c) + { + const float input_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)])); + const float new_input = + std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]); + const float forget_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)])); + const float output_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)])); + const float new_state = + input_gate * new_input + + forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)]; + output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state; + output_activ_data[Offset(output_activ_shape, b, h, w, c)] = + output_gate * std::tanh(new_state); + } + } + } + } +} + +// Quantized LSTM cell implementation. +// The quantization of the input, output arrays is as follows: +// - The input activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that is the natural interval for output +// activations (see next point) and these need to be concatenated together. +// We could accommodate different ranges by re-scaling, but we empirically +// found that setting the input activations range to be [-1, 127/128] in the +// first place, removing the need for re-scaling, greatly improves accuracy. +// - The output activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that the definition of a LSTM cell makes them +// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] +// makes for simpler, more accurate fixed-point arithmetic. +// - The output-at-previous-timestep state array is obviously quantized as +// the output activations. +// - The internal LSTM memory (not the output-at-previous-timestep, the other +// internal state array) is int16-quantized and may use any power-of-two, +// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call +// StateIntegerBits below, see the below discussion of that template +// parameter ("The StateIntegerBits template parameter"). +// - The output of the internal fully-connected node is int16-quantized +// on the interval [-8, 8 * 32767/32768], the rationale for which is +// explained just below ("Why [-8, 8] for fully-connected output?"). +// +// +// === The StateIntegerBits template parameter === +// +// The StateIntegerBits template parameter controls the fixed-point format used +// to represent the internal memory of the LSTM cell (not the +// output-at-previous-timestep, the other internal state array). It's currently +// a template parameter so that the model can control that. The most typical +// value for StateIntegerBits is 4. Other plausible values are anywhere between +// 3 and 5. We might eventually standardize on a single supported value, e.g. 4, +// and drop that template parameter. The reason why it can't be a runtime +// parameter is that this controls the fixed-point format used, i.e. we need to +// generate actually different code based on it. In particular, we generate code +// for a fixed-point tanh() implementation for that format, which internally +// uses a fixed-point exp() implementation, which internally uses a +// barrel-shifter with a number of steps that depends on StateIntegerBits. +// Another consequence of that is that a higher value of StateIntegerBits +// results in a more expensive implementation (more barrel shifter steps +// needed). +// +// +// === Why [-8, 8] for fully-connected output? === +// +// This array is only fed to Logistic and Tanh functions, for which +// the quantized implementation will want to use fixed-point arithmetic, +// requiring a power-of-two representation interval. Thus, we should right +// away quantize this array to a power-of-two interval; otherwise, +// implementation will need to rescale that, losing any benefit that a tighter +// representation interval might otherwise yield, while introducing some +// numerical error and computational overhead. +// +// Now, Logistic and Tanh +// are nearly constant (nearly equal to their horizontal asymptotes) +// outside of a small bounded interval around 0: +// +// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 +// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 +// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 +// +// From this, we see that clamping to [-4, 4] would be too inaccurate +// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) +// while clamping to [-16, 16] would make no difference even in float32. +// However, for a fixed-point implementation in 16-bit integers, using 5 +// integer bits to represent the [-16, 16] range would leave only 11 +// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive +// representable values. Notice that is higher than the +// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. +// Using [-8, 8] thus seems like the better compromise overall, enjoying +// an increment of 2.4e-4 between representable values and a worst-case +// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with +// [-16, 16]. +// +// Moreover, all other things being equal, it is nice to choose the narrower +// representation range, as that makes the implementation of fixed-point +// math functions a little cheaper (each integer bit requires an additional +// barrel-shifter atep in the implementation of exp(-x)). That is further +// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make +// sense for 32-bit float or 32-bit fixed-point quantization, but we are +// aiming for 16-bit fixed-point quantization of these internal nodes here. +// +template <int StateIntegerBits> +inline void +LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape, + const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape, + const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape, + const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape, + const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape, + int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape, + uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape, + uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape, + int16 *activ_temp_data_int16, void *gemmlowp_context) +{ + (void)gemmlowp_context; // only used in optimized code. + int32 weights_zero_point = params.weights_zero_point; + int32 accum_multiplier = params.accum_multiplier; + int accum_shift = params.accum_shift; + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + // Gather dimensions information, and perform consistency checks. + const int weights_dim_count = weights_shape.DimensionsCount(); + const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape, + output_state_shape, output_activ_shape); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); + const int fc_output_depth = + MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); + const int fc_accum_depth = total_input_depth; + TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); + + // Depth-concatenate prev_activ and input data together. + uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8}; + const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data, + concat_temp_shape, concat_temp_data_uint8); + + // Implementation of the fully connected node inside the LSTM cell. + // The operands are 8-bit integers, the accumulators are internally 32bit + // integers, and the output is 16-bit fixed-point with 3 integer bits so + // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that + // is explained in the function comment above. + for (int b = 0; b < fc_batches; ++b) + { + for (int out_c = 0; out_c < fc_output_depth; ++out_c) + { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum = bias_data_int32[out_c]; + // Accumulation loop. + for (int d = 0; d < fc_accum_depth; ++d) + { + int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128; + int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point; + accum += input_val * weights_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, using 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift); + // Saturate, cast to int16, and store to the temporary activations array. + accum = std::max(-32768, std::min(32767, static_cast<int>(accum))); + activ_temp_data_int16[out_c + fc_output_depth * b] = accum; + } + } + + // Rest of the LSTM cell: tanh and logistic math functions, and some adds + // and muls, all done in 16-bit fixed-point. + for (int b = 0; b < outer_size; ++b) + { + for (int c = 0; c < output_depth; ++c) + { + // Define the fixed-point data types that we will use here. All use + // int16 as the underlying integer type i.e. all are 16-bit fixed-point. + // They only differ by the number of integral vs. fractional bits, + // determining the range of values that they can represent. + // + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + // F3 uses 3 integer bits, range [-8, 8]. + // This is the range of the previous fully-connected node's output, + // which is our input here. + using F3 = gemmlowp::FixedPoint<std::int16_t, 3>; + // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits, + // 2^StateIntegerBits]. It's used to represent the internal state, whose + // number of integer bits is currently dictated by the model. See comment + // on the StateIntegerBits template parameter above. + using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>; + // Implementation of input gate, using fixed-point logistic function. + F3 input_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]); + F0 input_gate_output = gemmlowp::logistic(input_gate_input); + // Implementation of input modulation gate, using fixed-point tanh + // function. + F3 input_modulation_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]); + F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input); + // Implementation of forget gate, using fixed-point logistic function. + F3 forget_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]); + F0 forget_gate_output = gemmlowp::logistic(forget_gate_input); + // Implementation of output gate, using fixed-point logistic function. + F3 output_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]); + F0 output_gate_output = gemmlowp::logistic(output_gate_input); + // Implementation of internal multiplication nodes, still in fixed-point. + F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output; + FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]); + FS prev_state_times_forget_state = forget_gate_output * prev_state; + // Implementation of internal addition node, saturating. + FS new_state = + gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation), + prev_state_times_forget_state); + // Implementation of last internal Tanh node, still in fixed-point. + // Since a Tanh fixed-point implementation is specialized for a given + // number or integer bits, and each specialization can have a substantial + // code size, and we already used above a Tanh on an input with 3 integer + // bits, and per the table in the above function comment there is no + // significant accuracy to be lost by clamping to [-8, +8] for a + // 3-integer-bits representation, let us just do that. This helps people + // porting this to targets where code footprint must be minimized. + F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); + F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); + // Store the new internal state back to memory, as 16-bit integers. + // Note: here we store the original value with StateIntegerBits, not + // the rescaled 3-integer-bits value fed to tanh. + output_state_data_int16[b * output_depth + c] = new_state.raw(); + // Down-scale the output activations to 8-bit integers, saturating, + // and store back to memory. + int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8); + int16 clamped_output_activ = + std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ)); + output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ; + } + } +} + +template <typename Scalar> +void Split(const SplitParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape *const *output_shapes, Scalar *const *output_data) +{ + ruy::profiler::ScopeLabel label("Split"); + const int split_dimensions = input_shape.DimensionsCount(); + int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis; + int outputs_count = params.num_split; + TFLITE_DCHECK_LT(axis, split_dimensions); + + int64_t split_size = 0; + for (int i = 0; i < outputs_count; i++) + { + TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions); + for (int j = 0; j < split_dimensions; j++) + { + if (j != axis) + { + MatchingDim(*output_shapes[i], j, input_shape, j); + } + } + split_size += output_shapes[i]->Dims(axis); + } + TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + // For all output arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < split_dimensions; ++i) + { + base_inner_size *= input_shape.Dims(i); + } + + const Scalar *input_ptr = input_data; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < outputs_count; ++i) + { + const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size; + memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar)); + input_ptr += copy_size; + } + } +} + +inline int NodeOffset(int b, int h, int w, int height, int width) +{ + return (b * height + h) * width + w; +} + +inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) + { + for (int c = 0; c < depth; ++c) + { + const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range)); + const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range)); + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) + { + const float input_val = input_data[i * depth + input_c]; + accum += input_val * input_val; + } + const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta); + output_data[i * depth + c] = input_data[i * depth + c] * multiplier; + } + } +} + +inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; i++) + { + output_data[i] = static_cast<float>(input_data[i]); + } +} + +inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape, + const float *input_data, const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("FakeQuant"); + float rmin = op_params.minmax.min; + float rmax = op_params.minmax.max; + int num_bits = op_params.num_bits; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_DCHECK_LE(rmin, 0.0f); + TFLITE_DCHECK_GE(rmax, 0.0f); + TFLITE_DCHECK_LT(rmin, rmax); + + // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor. + int quant_min = 0; + int quant_max = (1 << num_bits) - 1; + float nudged_min, nudged_max, nudged_scale; + NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size); +} + +// Common subroutine for both `GatherNd` and `GatherNdString`. +struct GatherNdHelperResult +{ + int n_slices; + int slice_size; + int indices_nd; + std::vector<int> dims_to_count; +}; + +// Returns common values being used on both `GatherNd` and `GatherNdString`. +inline GatherNdHelperResult GatherNdHelper(const RuntimeShape ¶ms_shape, + const RuntimeShape &indices_shape) +{ + GatherNdHelperResult ret; + ret.n_slices = 1; + ret.slice_size = 1; + const int indices_dims = indices_shape.DimensionsCount(); + ret.indices_nd = indices_shape.Dims(indices_dims - 1); + const int params_dims = params_shape.DimensionsCount(); + for (int i = 0; i < indices_dims - 1; ++i) + { + ret.n_slices *= indices_shape.Dims(i); + } + for (int i = ret.indices_nd; i < params_dims; ++i) + { + ret.slice_size *= params_shape.Dims(i); + } + + int remain_flat_size = params_shape.FlatSize(); + ret.dims_to_count = std::vector<int>(ret.indices_nd, 0); + for (int i = 0; i < ret.indices_nd; ++i) + { + ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i); + remain_flat_size = ret.dims_to_count[i]; + } + + return ret; +} + +template <typename ParamsT, typename IndicesT = int32> +inline void GatherNd(const RuntimeShape ¶ms_shape, const ParamsT *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, ParamsT *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNd"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + std::memcpy(output_data + i * res.slice_size, params_data + from_pos, + sizeof(ParamsT) * res.slice_size); + } +} + +#ifndef TF_LITE_STATIC_MEMORY +template <typename IndicesT = int32> +inline void GatherNdString(const RuntimeShape ¶ms_shape, const TfLiteTensor *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, TfLiteTensor *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNdString"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + DynamicBuffer buffer; + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + for (int j = 0; j < res.slice_size; ++j) + { + buffer.AddString(GetString(params_data, from_pos + j)); + } + } + buffer.WriteToTensor(output_data, /*new_shape=*/nullptr); +} +#endif + +template <typename IndicesT, typename UpdatesT> +inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &updates_shape, const UpdatesT *updates_data, + const RuntimeShape &output_shape, UpdatesT *output_data) +{ + ruy::profiler::ScopeLabel label("ScatterNd"); + + int n_slices = 1; + int slice_size = 1; + const int outer_dims = indices_shape.DimensionsCount() - 1; + const int indices_nd = indices_shape.Dims(outer_dims); + const int updates_dims = updates_shape.DimensionsCount(); + for (int i = 0; i < outer_dims; ++i) + { + n_slices *= indices_shape.Dims(i); + } + for (int i = outer_dims; i < updates_dims; ++i) + { + slice_size *= updates_shape.Dims(i); + } + + int output_flat_size = output_shape.FlatSize(); + int remain_flat_size = output_flat_size; + std::vector<int> dims_to_count(indices_nd, 0); + for (int i = 0; i < indices_nd; ++i) + { + dims_to_count[i] = remain_flat_size / output_shape.Dims(i); + remain_flat_size = dims_to_count[i]; + } + + memset(output_data, 0, sizeof(UpdatesT) * output_flat_size); + for (int i = 0; i < n_slices; ++i) + { + int to_pos = 0; + for (int j = 0; j < indices_nd; ++j) + { + IndicesT idx = indices_data[i * indices_nd + j]; + TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j)); + to_pos += idx * dims_to_count[j]; + } + for (int j = 0; j < slice_size; j++) + { + output_data[to_pos + j] += updates_data[i * slice_size + j]; + } + } +} + +template <typename T> +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer) +{ + const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape); + TFLITE_DCHECK_LE(op_params.begin_count, 5); + TFLITE_DCHECK_LE(op_params.size_count, 5); + const int begin_count = op_params.begin_count; + const int size_count = op_params.size_count; + // We front-pad the begin and size vectors. + std::array<int, 5> start; + std::array<int, 5> stop; + for (int i = 0; i < 5; ++i) + { + int padded_i = 5 - i; + start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i]; + stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1) + ? ext_shape.Dims(i) + : start[i] + op_params.size[size_count - padded_i]; + } + + for (int i0 = start[0]; i0 < stop[0]; ++i0) + { + for (int i1 = start[1]; i1 < stop[1]; ++i1) + { + for (int i2 = start[2]; i2 < stop[2]; ++i2) + { + for (int i3 = start[3]; i3 < stop[3]; ++i3) + { + for (int i4 = start[4]; i4 < stop[4]; ++i4) + { + writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4)); + } + } + } + } + } +} + +template <typename T> +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + SequentialTensorWriter<T> writer(input_data, output_data); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template <typename T> +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output) +{ + SequentialTensorWriter<T> writer(input, output); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template <typename T> +void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto min_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template <typename T> +inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Minimum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template <typename T> +void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto max_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template <typename T> +inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Maximum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template <typename T1, typename T2, typename T3> +void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>()); +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template <typename T1, typename T2, typename T3> +inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, + const RuntimeShape &input2_shape, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + // Drop shape of second input: not needed. + ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template <typename D, typename T> +void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + int64_t flatsize; + // Allow select operator executions on mixed scalar tensors and one element + // tensors. + if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 && + input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) + { + flatsize = 1; + } + else + { + flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape); + } + for (int64_t i = 0; i < flatsize; ++i) + { + output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i]; + } +} + +template <typename D, typename T> +void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int64_t outer_size = input_condition_shape.FlatSize(); + int64_t inner_size; + if (input_condition_shape.DimensionsCount() == 0) + { + inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape); + } + else + { + TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size); + inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape); + } + + int64_t offset = 0; + for (int64_t i = 0; i < outer_size; i++) + { + const T *input_data = input_condition_data[i] ? input_x_data : input_y_data; + memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T)); + offset += inner_size; + } +} + +template <typename D, typename T> +void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4); + + const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape); + + NdArrayDesc<4> desc_condition; + NdArrayDesc<4> desc_x; + NdArrayDesc<4> desc_y; + NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape, + &desc_condition, &desc_x, &desc_y); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) + { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) + { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) + { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) + { + const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c); + const int x_index = SubscriptToIndex(desc_x, b, y, x, c); + const int y_index = SubscriptToIndex(desc_y, b, y, x, c); + output_data[Offset(extended_output_shape, b, y, x, c)] = + input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index]; + } + } + } + } +} + +template <typename D, typename T> +void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data, + T *output_data) +{ + const size_t size = input_condition_shape.FlatSize(); + if (size == 0) + { + // Dimension is zero, in which case we don't need to output. + return; + } + const size_t cond_rank = input_condition_shape.DimensionsCount(); + + std::vector<int> dims_to_count(cond_rank, 0); + int cur_flat_size = size; + for (int i = 0; i < cond_rank; ++i) + { + dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i); + cur_flat_size = dims_to_count[i]; + } + + int output_index = 0; + for (int i = 0; i < size; ++i) + { + if (input_condition_data[i]) + { + // Insert the coordinate of the current item (row major) into output. + int flat_index = i; + for (int j = 0; j < cond_rank; ++j) + { + int coord_j = flat_index / dims_to_count[j]; + output_data[output_index * cond_rank + j] = coord_j; + flat_index %= dims_to_count[j]; + } + output_index++; + } + } +} + +// For easy implementation, the indices is always a vector of size-4 vectors. +template <typename T, typename TI> +inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values, + T default_value, bool value_is_scalar, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int value_count = indices.size(); + + // First fill the output_data with default value. + const int num_elements = output_shape.FlatSize(); + for (int i = 0; i < num_elements; ++i) + { + output_data[i] = default_value; + } + + // Special handle for value is scalar case to avoid checking the boolean + // condition within the loop every time. + if (value_is_scalar) + { + for (int i = 0; i < value_count; ++i) + { + const std::vector<TI> &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = *values; // just use the first value. + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } + return; + } + + // Go through the values and indices to fill the sparse values. + for (int i = 0; i < value_count; ++i) + { + const std::vector<TI> &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = values[i]; + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } +} + +template <typename T> +inline void Pow(const RuntimeShape &input1_shape, const T *input1_data, + const RuntimeShape &input2_shape, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = std::pow(input1_data[i], input2_data[i]); + } +} + +template <typename T> +inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data, + const RuntimeShape &unextended_input2_shape, const T *input2_data, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1, + &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) + { + for (int y = 0; y < output_shape.Dims(1); ++y) + { + for (int x = 0; x < output_shape.Dims(2); ++x) + { + for (int c = 0; c < output_shape.Dims(3); ++c) + { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = std::pow(in1_val, in2_val); + } + } + } + } +} + +template <typename Scalar> +void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Reverse"); + + int outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_axis = input_shape.Dims(axis); + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_axis; ++j) + { + const int start_pos = (i * dims_at_axis + j) * copy_size; + Scalar *output_ptr = output_data + start_pos; + int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template <typename Scalar, typename TS> +void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim, + const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("ReverseSequence"); + + int outer_size = 1; + int outer_dim = std::min(batch_dim, seq_dim); + int medium_dim = std::max(batch_dim, seq_dim); + for (int i = 0; i < outer_dim; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int medium_size = 1; + for (int i = outer_dim + 1; i < medium_dim; ++i) + { + medium_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_outer_dim = input_shape.Dims(outer_dim); + const int dims_at_medium_dim = input_shape.Dims(medium_dim); + + Scalar *output_ptr; + if (batch_dim > seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + int sl = seq_lengths[q] - 1; + if (j > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size; + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } + else if (batch_dim < seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + int sl = seq_lengths[j] - 1; + const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + if (q > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } +} + +template <typename T> +inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape); + + memset(output_data, 0, sizeof(T) * output_shape.FlatSize()); + + for (int i = 0; i < input_shape.Dims(0); i++) + { + int output_index = segment_ids_data[i]; + for (int j = 0; j < segment_flat_size; ++j) + { + output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j]; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst index 428b15ee0..1e6c41ecc 100644 --- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -13,6 +13,7 @@ REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -48,6 +49,7 @@ REGISTER_KERNEL(PadV2) REGISTER_KERNEL(Pow) REGISTER_KERNEL(PRelu) REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(ReduceMax) REGISTER_KERNEL(Relu) REGISTER_KERNEL(Relu6) REGISTER_KERNEL(Reshape) @@ -55,6 +57,7 @@ REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(ReverseV2) REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) REGISTER_KERNEL(Slice) REGISTER_KERNEL(Softmax) REGISTER_KERNEL(SpaceToBatchND) diff --git a/compiler/luci-interpreter/pal/linux/PALreference_ops.h b/compiler/luci-interpreter/pal/linux/PALreference_ops.h new file mode 100644 index 000000000..825ebfe8e --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALreference_ops.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H +#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst index d134a6b95..f0df58db3 100644 --- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst +++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -12,6 +12,7 @@ REGISTER_KERNEL(Div) REGISTER_KERNEL(Elu) REGISTER_KERNEL(Exp) REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) REGISTER_KERNEL(Floor) REGISTER_KERNEL(FloorDiv) REGISTER_KERNEL(Equal) @@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape) REGISTER_KERNEL(ResizeBilinear) REGISTER_KERNEL(ResizeNearestNeighbor) REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) REGISTER_KERNEL(Softmax) REGISTER_KERNEL(SpaceToBatchND) REGISTER_KERNEL(SpaceToDepth) diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h index 15ff0327b..efa6b167e 100644 --- a/compiler/luci-interpreter/pal/mcu/PALDequantize.h +++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h @@ -18,7 +18,7 @@ #define LUCI_INTERPRETER_PAL_DEQUANTIZE_H #include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h index 6046789ae..effb85d54 100644 --- a/compiler/luci-interpreter/pal/mcu/PALQuantize.h +++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h @@ -17,7 +17,7 @@ #ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H #define LUCI_INTERPRETER_PAL_QUANTIZE_H -#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "PALreference_ops.h" namespace luci_interpreter_pal { diff --git a/compiler/luci-interpreter/pal/mcu/PALreference_ops.h b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h new file mode 100644 index 000000000..62c720937 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h @@ -0,0 +1,1556 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H +#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H + +#include <stdint.h> +#include <sys/types.h> + +#include <algorithm> +#include <cmath> +#include <cstring> +#include <functional> +#include <limits> +#include <memory> +#include <type_traits> + +#include "third_party/eigen3/Eigen/Core" +#include "fixedpoint/fixedpoint.h" +#include "ruy/profiler/instrumentation.h" // from @ruy +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/add.h" +#include "tensorflow/lite/kernels/internal/reference/add_n.h" +#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h" +#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h" +#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h" +#include "tensorflow/lite/kernels/internal/reference/binary_function.h" +#include "tensorflow/lite/kernels/internal/reference/cast.h" +#include "tensorflow/lite/kernels/internal/reference/ceil.h" +#include "tensorflow/lite/kernels/internal/reference/comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/concatenation.h" +#include "tensorflow/lite/kernels/internal/reference/conv.h" +#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h" +#include "tensorflow/lite/kernels/internal/reference/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/div.h" +#include "tensorflow/lite/kernels/internal/reference/elu.h" +#include "tensorflow/lite/kernels/internal/reference/exp.h" +#include "tensorflow/lite/kernels/internal/reference/fill.h" +#include "tensorflow/lite/kernels/internal/reference/floor.h" +#include "tensorflow/lite/kernels/internal/reference/floor_div.h" +#include "tensorflow/lite/kernels/internal/reference/floor_mod.h" +#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/gather.h" +#include "tensorflow/lite/kernels/internal/reference/hard_swish.h" +#include "tensorflow/lite/kernels/internal/reference/l2normalization.h" +#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" +#include "tensorflow/lite/kernels/internal/reference/log_softmax.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/neg.h" +#include "tensorflow/lite/kernels/internal/reference/pad.h" +#include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "tensorflow/lite/kernels/internal/reference/prelu.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/reference/quantize.h" +#include "tensorflow/lite/kernels/internal/reference/reduce.h" +#include "tensorflow/lite/kernels/internal/reference/requantize.h" +#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h" +#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h" +#include "tensorflow/lite/kernels/internal/reference/round.h" +#include "tensorflow/lite/kernels/internal/reference/softmax.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h" +#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h" +#include "tensorflow/lite/kernels/internal/reference/strided_slice.h" +#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h" +#include "tensorflow/lite/kernels/internal/reference/sub.h" +#include "tensorflow/lite/kernels/internal/reference/tanh.h" +#include "tensorflow/lite/kernels/internal/reference/transpose.h" +#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h" +#include "tensorflow/lite/kernels/internal/strided_slice_logic.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/internal/types.h" +namespace tflite +{ + +namespace reference_ops +{ + +template <typename T> +inline void Relu(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T lower = 0; + const T clamped = val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template <typename T> +inline void Relu1(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Relu1 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T upper = 1; + const T lower = -1; + const T clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +inline void Relu6(const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("Relu6 (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const float val = input_data[i]; + const float upper = 6; + const float lower = 0; + const float clamped = val > upper ? upper : val < lower ? lower : val; + output_data[i] = clamped; + } +} + +template <typename T> +inline void ReluX(const tflite::ReluParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + const int32 val = static_cast<int32_t>(input_data[i]); + int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset, + params.output_multiplier, + params.output_shift); + clamped = std::max(params.quantized_activation_min, clamped); + clamped = std::min(params.quantized_activation_max, clamped); + output_data[i] = static_cast<T>(clamped); + } +} + +template <typename T> +inline void ReluX(const tflite::ActivationParams ¶ms, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)"); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + const T max_value = params.quantized_activation_max; + const T min_value = params.quantized_activation_min; + for (int i = 0; i < flat_size; ++i) + { + const T val = input_data[i]; + const T clamped = val > max_value ? max_value : val < min_value ? min_value : val; + output_data[i] = clamped; + } +} + +// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params, + const RuntimeShape &unswitched_input1_shape, + const uint8 *unswitched_input1_data, + const RuntimeShape &unswitched_input2_shape, + const uint8 *unswitched_input2_data, + const RuntimeShape &output_shape, uint8 *output_data) +{ + ArithmeticParams switched_params = unswitched_params; + switched_params.input1_offset = unswitched_params.input2_offset; + switched_params.input2_offset = unswitched_params.input1_offset; + + const bool use_unswitched = unswitched_params.broadcast_category == + tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast; + + const ArithmeticParams ¶ms = use_unswitched ? unswitched_params : switched_params; + const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data; + const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data; + + // Fivefold nested loops. The second input resets its position for each + // iteration of the second loop. The first input resets its position at the + // beginning of the fourth loop. The innermost loop is an elementwise Mul of + // sections of the arrays. + uint8 *output_data_ptr = output_data; + const uint8 *input1_data_ptr = input1_data; + const uint8 *input2_data_reset = input2_data; + int y0 = params.broadcast_shape[0]; + int y1 = params.broadcast_shape[1]; + int y2 = params.broadcast_shape[2]; + int y3 = params.broadcast_shape[3]; + int y4 = params.broadcast_shape[4]; + for (int i0 = 0; i0 < y0; ++i0) + { + const uint8 *input2_data_ptr; + for (int i1 = 0; i1 < y1; ++i1) + { + input2_data_ptr = input2_data_reset; + for (int i2 = 0; i2 < y2; ++i2) + { + for (int i3 = 0; i3 < y3; ++i3) + { + MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr); + input2_data_ptr += y4; + output_data_ptr += y4; + } + input1_data_ptr += y4; + } + } + input2_data_reset = input2_data_ptr; + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16"); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + output_data[i] = unclamped_result.raw(); + } +} + +inline void Mul(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16 *input1_data, const RuntimeShape &input2_shape, + const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("Mul/Int16Uint8"); + int32 output_offset = params.output_offset; + int32 output_activation_min = params.quantized_activation_min; + int32 output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + for (int i = 0; i < flat_size; i++) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + + F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result); + clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + +inline void Sub16(const ArithmeticParams ¶ms, const RuntimeShape &input1_shape, + const int16_t *input1_data, const RuntimeShape &input2_shape, + const int16_t *input2_data, const RuntimeShape &output_shape, + int16_t *output_data) +{ + ruy::profiler::ScopeLabel label("Sub/Int16"); + const int input1_shift = params.input1_shift; + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + const int16 output_activation_min = params.quantized_activation_min; + const int16 output_activation_max = params.quantized_activation_max; + + TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); + TFLITE_DCHECK_LE(input1_shift, 0); + TFLITE_DCHECK_LE(params.input2_shift, 0); + const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data; + const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift; + + if (input1_shift == 0) + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(input_ready_scaled, scaled_input); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } + else + { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + for (int i = 0; i < flat_size; ++i) + { + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); + F0 result = SaturatingSub(scaled_input, input_ready_scaled); + const int16 raw_output = result.raw(); + const int16 clamped_output = + std::min(output_activation_max, std::max(output_activation_min, raw_output)); + output_data[i] = clamped_output; + } + } +} + +template <typename Scalar> +void Pack(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Pack"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + int inputs_count = params.inputs_count; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = params.axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + for (int i = 0; i < inputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + const Scalar *input_ptr = input_data[i] + copy_size * k; + int loc = k * inputs_count * copy_size + i * copy_size; + memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar)); + } + } +} + +template <typename Scalar> +void Unpack(const UnpackParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *const *output_datas) +{ + ruy::profiler::ScopeLabel label("Unpack"); + const int dimensions = input_shape.DimensionsCount(); + const int outputs_count = params.num_split; + + int outer_size = 1; + int axis = params.axis; + if (axis < 0) + { + axis += dimensions; + } + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, dimensions); + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; ++i) + { + copy_size *= input_shape.Dims(i); + } + TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size); + + for (int i = 0; i < outputs_count; ++i) + { + for (int k = 0; k < outer_size; k++) + { + Scalar *output_ptr = output_datas[i] + copy_size * k; + int loc = k * outputs_count * copy_size + i * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template <typename Scalar> +void PackWithScaling(const PackParams ¶ms, const RuntimeShape *const *input_shapes, + const uint8 *const *input_data, const RuntimeShape &output_shape, + uint8 *output_data) +{ + ruy::profiler::ScopeLabel label("PackWithScaling"); + const int dimensions = output_shape.DimensionsCount(); + int axis = params.axis; + const int32 *input_zeropoint = params.input_zeropoint; + const float *input_scale = params.input_scale; + int inputs_count = params.inputs_count; + const int32 output_zeropoint = params.output_zeropoint; + const float output_scale = params.output_scale; + + int outer_size = 1; + for (int i = 0; i < axis; i++) + { + outer_size *= output_shape.Dims(i); + } + int copy_size = 1; + for (int i = axis + 1; i < dimensions; i++) + { + copy_size *= output_shape.Dims(i); + } + TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size); + + Scalar *output_ptr = output_data; + const float inverse_output_scale = 1.f / output_scale; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < inputs_count; ++i) + { + if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale) + { + memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar)); + } + else + { + assert(false); + const float scale = input_scale[i] * inverse_output_scale; + const float bias = -input_zeropoint[i] * scale; + auto input_ptr = input_data[i]; + for (int j = 0; j < copy_size; ++j) + { + const int value = + static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint; + output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0)); + } + } + output_ptr += copy_size; + } + } +} + +template <typename Scalar> +void DepthConcatenation(const ConcatenationParams ¶ms, const RuntimeShape *const *input_shapes, + const Scalar *const *input_data, const RuntimeShape &output_shape, + Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("DepthConcatenation"); + auto params_copy = params; + params_copy.axis = 3; + Concatenation(params_copy, input_shapes, input_data, output_shape, output_data); +} + +inline void LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const float *input_data, const RuntimeShape &unextended_prev_activ_shape, + const float *prev_activ_data, const RuntimeShape &weights_shape, + const float *weights_data, const RuntimeShape &unextended_bias_shape, + const float *bias_data, const RuntimeShape &unextended_prev_state_shape, + const float *prev_state_data, + const RuntimeShape &unextended_output_state_shape, float *output_state_data, + const RuntimeShape &unextended_output_activ_shape, float *output_activ_data, + const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data, + const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data) +{ + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, + output_state_shape, 0, output_activ_shape, 0); + const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, + output_state_shape, 1, output_activ_shape, 1); + const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, + output_state_shape, 2, output_activ_shape, 2); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + + // Concatenate prev_activ and input data together + std::vector<float const *> concat_input_arrays_data; + std::vector<RuntimeShape const *> concat_input_arrays_shapes; + concat_input_arrays_data.push_back(input_data); + concat_input_arrays_data.push_back(prev_activ_data); + concat_input_arrays_shapes.push_back(&input_shape); + concat_input_arrays_shapes.push_back(&prev_activ_shape); + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = concat_input_arrays_data.size(); + Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]), + concat_temp_shape, concat_temp_data); + + // Fully connected + tflite::FullyConnectedParams fc_params; + fc_params.float_activation_min = std::numeric_limits<float>::lowest(); + fc_params.float_activation_max = std::numeric_limits<float>::max(); + FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data, + bias_shape, bias_data, activ_temp_shape, activ_temp_data); + + // Memory state update (the LSTM "guts") + for (int b = 0; b < batches; ++b) + { + for (int w = 0; w < width; ++w) + { + for (int h = 0; h < height; ++h) + { + for (int c = 0; c < output_depth; ++c) + { + const float input_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)])); + const float new_input = + std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]); + const float forget_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)])); + const float output_gate = + 1.f / + (1.f + + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)])); + const float new_state = + input_gate * new_input + + forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)]; + output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state; + output_activ_data[Offset(output_activ_shape, b, h, w, c)] = + output_gate * std::tanh(new_state); + } + } + } + } +} + +// Quantized LSTM cell implementation. +// The quantization of the input, output arrays is as follows: +// - The input activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that is the natural interval for output +// activations (see next point) and these need to be concatenated together. +// We could accommodate different ranges by re-scaling, but we empirically +// found that setting the input activations range to be [-1, 127/128] in the +// first place, removing the need for re-scaling, greatly improves accuracy. +// - The output activations are quantized as uint8 on the interval +// [-1, 127/128]. +// The rationale for that is that the definition of a LSTM cell makes them +// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] +// makes for simpler, more accurate fixed-point arithmetic. +// - The output-at-previous-timestep state array is obviously quantized as +// the output activations. +// - The internal LSTM memory (not the output-at-previous-timestep, the other +// internal state array) is int16-quantized and may use any power-of-two, +// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call +// StateIntegerBits below, see the below discussion of that template +// parameter ("The StateIntegerBits template parameter"). +// - The output of the internal fully-connected node is int16-quantized +// on the interval [-8, 8 * 32767/32768], the rationale for which is +// explained just below ("Why [-8, 8] for fully-connected output?"). +// +// +// === The StateIntegerBits template parameter === +// +// The StateIntegerBits template parameter controls the fixed-point format used +// to represent the internal memory of the LSTM cell (not the +// output-at-previous-timestep, the other internal state array). It's currently +// a template parameter so that the model can control that. The most typical +// value for StateIntegerBits is 4. Other plausible values are anywhere between +// 3 and 5. We might eventually standardize on a single supported value, e.g. 4, +// and drop that template parameter. The reason why it can't be a runtime +// parameter is that this controls the fixed-point format used, i.e. we need to +// generate actually different code based on it. In particular, we generate code +// for a fixed-point tanh() implementation for that format, which internally +// uses a fixed-point exp() implementation, which internally uses a +// barrel-shifter with a number of steps that depends on StateIntegerBits. +// Another consequence of that is that a higher value of StateIntegerBits +// results in a more expensive implementation (more barrel shifter steps +// needed). +// +// +// === Why [-8, 8] for fully-connected output? === +// +// This array is only fed to Logistic and Tanh functions, for which +// the quantized implementation will want to use fixed-point arithmetic, +// requiring a power-of-two representation interval. Thus, we should right +// away quantize this array to a power-of-two interval; otherwise, +// implementation will need to rescale that, losing any benefit that a tighter +// representation interval might otherwise yield, while introducing some +// numerical error and computational overhead. +// +// Now, Logistic and Tanh +// are nearly constant (nearly equal to their horizontal asymptotes) +// outside of a small bounded interval around 0: +// +// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 +// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 +// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 +// +// From this, we see that clamping to [-4, 4] would be too inaccurate +// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) +// while clamping to [-16, 16] would make no difference even in float32. +// However, for a fixed-point implementation in 16-bit integers, using 5 +// integer bits to represent the [-16, 16] range would leave only 11 +// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive +// representable values. Notice that is higher than the +// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. +// Using [-8, 8] thus seems like the better compromise overall, enjoying +// an increment of 2.4e-4 between representable values and a worst-case +// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with +// [-16, 16]. +// +// Moreover, all other things being equal, it is nice to choose the narrower +// representation range, as that makes the implementation of fixed-point +// math functions a little cheaper (each integer bit requires an additional +// barrel-shifter atep in the implementation of exp(-x)). That is further +// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make +// sense for 32-bit float or 32-bit fixed-point quantization, but we are +// aiming for 16-bit fixed-point quantization of these internal nodes here. +// +template <int StateIntegerBits> +inline void +LstmCell(const LstmCellParams ¶ms, const RuntimeShape &unextended_input_shape, + const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape, + const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape, + const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape, + const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape, + const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape, + int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape, + uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape, + uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape, + int16 *activ_temp_data_int16, void *gemmlowp_context) +{ + (void)gemmlowp_context; // only used in optimized code. + int32 weights_zero_point = params.weights_zero_point; + int32 accum_multiplier = params.accum_multiplier; + int accum_shift = params.accum_shift; + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + // Gather dimensions information, and perform consistency checks. + const int weights_dim_count = weights_shape.DimensionsCount(); + const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape, + output_state_shape, output_activ_shape); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); + const int total_input_depth = prev_activ_depth + input_depth; + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth); + const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); + const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); + const int fc_output_depth = + MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); + const int fc_accum_depth = total_input_depth; + TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); + + // Depth-concatenate prev_activ and input data together. + uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8}; + const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data, + concat_temp_shape, concat_temp_data_uint8); + + // Implementation of the fully connected node inside the LSTM cell. + // The operands are 8-bit integers, the accumulators are internally 32bit + // integers, and the output is 16-bit fixed-point with 3 integer bits so + // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that + // is explained in the function comment above. + for (int b = 0; b < fc_batches; ++b) + { + for (int out_c = 0; out_c < fc_output_depth; ++out_c) + { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum = bias_data_int32[out_c]; + // Accumulation loop. + for (int d = 0; d < fc_accum_depth; ++d) + { + int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128; + int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point; + accum += input_val * weights_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, using 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift); + // Saturate, cast to int16, and store to the temporary activations array. + accum = std::max(-32768, std::min(32767, static_cast<int>(accum))); + activ_temp_data_int16[out_c + fc_output_depth * b] = accum; + } + } + + // Rest of the LSTM cell: tanh and logistic math functions, and some adds + // and muls, all done in 16-bit fixed-point. + for (int b = 0; b < outer_size; ++b) + { + for (int c = 0; c < output_depth; ++c) + { + // Define the fixed-point data types that we will use here. All use + // int16 as the underlying integer type i.e. all are 16-bit fixed-point. + // They only differ by the number of integral vs. fractional bits, + // determining the range of values that they can represent. + // + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; + // F3 uses 3 integer bits, range [-8, 8]. + // This is the range of the previous fully-connected node's output, + // which is our input here. + using F3 = gemmlowp::FixedPoint<std::int16_t, 3>; + // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits, + // 2^StateIntegerBits]. It's used to represent the internal state, whose + // number of integer bits is currently dictated by the model. See comment + // on the StateIntegerBits template parameter above. + using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>; + // Implementation of input gate, using fixed-point logistic function. + F3 input_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]); + F0 input_gate_output = gemmlowp::logistic(input_gate_input); + // Implementation of input modulation gate, using fixed-point tanh + // function. + F3 input_modulation_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]); + F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input); + // Implementation of forget gate, using fixed-point logistic function. + F3 forget_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]); + F0 forget_gate_output = gemmlowp::logistic(forget_gate_input); + // Implementation of output gate, using fixed-point logistic function. + F3 output_gate_input = + F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]); + F0 output_gate_output = gemmlowp::logistic(output_gate_input); + // Implementation of internal multiplication nodes, still in fixed-point. + F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output; + FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]); + FS prev_state_times_forget_state = forget_gate_output * prev_state; + // Implementation of internal addition node, saturating. + FS new_state = + gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation), + prev_state_times_forget_state); + // Implementation of last internal Tanh node, still in fixed-point. + // Since a Tanh fixed-point implementation is specialized for a given + // number or integer bits, and each specialization can have a substantial + // code size, and we already used above a Tanh on an input with 3 integer + // bits, and per the table in the above function comment there is no + // significant accuracy to be lost by clamping to [-8, +8] for a + // 3-integer-bits representation, let us just do that. This helps people + // porting this to targets where code footprint must be minimized. + F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); + F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); + // Store the new internal state back to memory, as 16-bit integers. + // Note: here we store the original value with StateIntegerBits, not + // the rescaled 3-integer-bits value fed to tanh. + output_state_data_int16[b * output_depth + c] = new_state.raw(); + // Down-scale the output activations to 8-bit integers, saturating, + // and store back to memory. + int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8); + int16 clamped_output_activ = + std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ)); + output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ; + } + } +} + +template <typename Scalar> +void Split(const SplitParams ¶ms, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape *const *output_shapes, Scalar *const *output_data) +{ + ruy::profiler::ScopeLabel label("Split"); + const int split_dimensions = input_shape.DimensionsCount(); + int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis; + int outputs_count = params.num_split; + TFLITE_DCHECK_LT(axis, split_dimensions); + + int64_t split_size = 0; + for (int i = 0; i < outputs_count; i++) + { + TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions); + for (int j = 0; j < split_dimensions; j++) + { + if (j != axis) + { + MatchingDim(*output_shapes[i], j, input_shape, j); + } + } + split_size += output_shapes[i]->Dims(axis); + } + TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + // For all output arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < split_dimensions; ++i) + { + base_inner_size *= input_shape.Dims(i); + } + + const Scalar *input_ptr = input_data; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < outputs_count; ++i) + { + const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size; + memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar)); + input_ptr += copy_size; + } + } +} + +inline int NodeOffset(int b, int h, int w, int height, int width) +{ + return (b * height + h) * width + w; +} + +inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const RuntimeShape &input_shape, const float *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + + for (int i = 0; i < outer_size; ++i) + { + for (int c = 0; c < depth; ++c) + { + const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range)); + const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range)); + float accum = 0.f; + for (int input_c = begin_input_c; input_c < end_input_c; ++input_c) + { + const float input_val = input_data[i * depth + input_c]; + accum += input_val * input_val; + } + const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta); + output_data[i * depth + c] = input_data[i * depth + c] * multiplier; + } + } +} + +inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data, + const RuntimeShape &output_shape, float *output_data) +{ + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; i++) + { + output_data[i] = static_cast<float>(input_data[i]); + } +} + +inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape, + const float *input_data, const RuntimeShape &output_shape, float *output_data) +{ + ruy::profiler::ScopeLabel label("FakeQuant"); + float rmin = op_params.minmax.min; + float rmax = op_params.minmax.max; + int num_bits = op_params.num_bits; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_DCHECK_LE(rmin, 0.0f); + TFLITE_DCHECK_GE(rmax, 0.0f); + TFLITE_DCHECK_LT(rmin, rmax); + + // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor. + int quant_min = 0; + int quant_max = (1 << num_bits) - 1; + float nudged_min, nudged_max, nudged_scale; + NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale); + const int flat_size = MatchingFlatSize(input_shape, output_shape); + FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size); +} + +// Common subroutine for both `GatherNd` and `GatherNdString`. +struct GatherNdHelperResult +{ + int n_slices; + int slice_size; + int indices_nd; + std::vector<int> dims_to_count; +}; + +// Returns common values being used on both `GatherNd` and `GatherNdString`. +inline GatherNdHelperResult GatherNdHelper(const RuntimeShape ¶ms_shape, + const RuntimeShape &indices_shape) +{ + GatherNdHelperResult ret; + ret.n_slices = 1; + ret.slice_size = 1; + const int indices_dims = indices_shape.DimensionsCount(); + ret.indices_nd = indices_shape.Dims(indices_dims - 1); + const int params_dims = params_shape.DimensionsCount(); + for (int i = 0; i < indices_dims - 1; ++i) + { + ret.n_slices *= indices_shape.Dims(i); + } + for (int i = ret.indices_nd; i < params_dims; ++i) + { + ret.slice_size *= params_shape.Dims(i); + } + + int remain_flat_size = params_shape.FlatSize(); + ret.dims_to_count = std::vector<int>(ret.indices_nd, 0); + for (int i = 0; i < ret.indices_nd; ++i) + { + ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i); + remain_flat_size = ret.dims_to_count[i]; + } + + return ret; +} + +template <typename ParamsT, typename IndicesT = int32> +inline void GatherNd(const RuntimeShape ¶ms_shape, const ParamsT *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, ParamsT *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNd"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + std::memcpy(output_data + i * res.slice_size, params_data + from_pos, + sizeof(ParamsT) * res.slice_size); + } +} + +#ifndef TF_LITE_STATIC_MEMORY +template <typename IndicesT = int32> +inline void GatherNdString(const RuntimeShape ¶ms_shape, const TfLiteTensor *params_data, + const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &output_shape, TfLiteTensor *output_data) +{ + ruy::profiler::ScopeLabel label("GatherNdString"); + + const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape); + DynamicBuffer buffer; + for (int i = 0; i < res.n_slices; ++i) + { + int from_pos = 0; + for (int j = 0; j < res.indices_nd; ++j) + { + from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j]; + } + for (int j = 0; j < res.slice_size; ++j) + { + buffer.AddString(GetString(params_data, from_pos + j)); + } + } + buffer.WriteToTensor(output_data, /*new_shape=*/nullptr); +} +#endif + +template <typename IndicesT, typename UpdatesT> +inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data, + const RuntimeShape &updates_shape, const UpdatesT *updates_data, + const RuntimeShape &output_shape, UpdatesT *output_data) +{ + ruy::profiler::ScopeLabel label("ScatterNd"); + + int n_slices = 1; + int slice_size = 1; + const int outer_dims = indices_shape.DimensionsCount() - 1; + const int indices_nd = indices_shape.Dims(outer_dims); + const int updates_dims = updates_shape.DimensionsCount(); + for (int i = 0; i < outer_dims; ++i) + { + n_slices *= indices_shape.Dims(i); + } + for (int i = outer_dims; i < updates_dims; ++i) + { + slice_size *= updates_shape.Dims(i); + } + + int output_flat_size = output_shape.FlatSize(); + int remain_flat_size = output_flat_size; + std::vector<int> dims_to_count(indices_nd, 0); + for (int i = 0; i < indices_nd; ++i) + { + dims_to_count[i] = remain_flat_size / output_shape.Dims(i); + remain_flat_size = dims_to_count[i]; + } + + memset(output_data, 0, sizeof(UpdatesT) * output_flat_size); + for (int i = 0; i < n_slices; ++i) + { + int to_pos = 0; + for (int j = 0; j < indices_nd; ++j) + { + IndicesT idx = indices_data[i * indices_nd + j]; + TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j)); + to_pos += idx * dims_to_count[j]; + } + for (int j = 0; j < slice_size; j++) + { + output_data[to_pos + j] += updates_data[i * slice_size + j]; + } + } +} + +template <typename T> +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer) +{ + const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape); + TFLITE_DCHECK_LE(op_params.begin_count, 5); + TFLITE_DCHECK_LE(op_params.size_count, 5); + const int begin_count = op_params.begin_count; + const int size_count = op_params.size_count; + // We front-pad the begin and size vectors. + std::array<int, 5> start; + std::array<int, 5> stop; + for (int i = 0; i < 5; ++i) + { + int padded_i = 5 - i; + start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i]; + stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1) + ? ext_shape.Dims(i) + : start[i] + op_params.size[size_count - padded_i]; + } + + for (int i0 = start[0]; i0 < stop[0]; ++i0) + { + for (int i1 = start[1]; i1 < stop[1]; ++i1) + { + for (int i2 = start[2]; i2 < stop[2]; ++i2) + { + for (int i3 = start[3]; i3 < stop[3]; ++i3) + { + for (int i4 = start[4]; i4 < stop[4]; ++i4) + { + writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4)); + } + } + } + } + } +} + +template <typename T> +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const T *input_data, const RuntimeShape &output_shape, T *output_data) +{ + SequentialTensorWriter<T> writer(input_data, output_data); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template <typename T> +inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape, + const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output) +{ + SequentialTensorWriter<T> writer(input, output); + return Slice(op_params, input_shape, output_shape, &writer); +} + +template <typename T> +void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto min_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template <typename T> +inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Minimum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template <typename T> +void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, output_shape); + + auto max_value = input2_data[0]; + for (int i = 0; i < flat_size; i++) + { + output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i]; + } +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template <typename T> +inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, + const T *input2_data, const RuntimeShape &output_shape, T *output_data) +{ + // Drop shape of second input: not needed. + Maximum(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template <typename T1, typename T2, typename T3> +void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>()); +} + +// Convenience version that allows, for example, generated-code calls to be +// the same as other binary ops. +template <typename T1, typename T2, typename T3> +inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, + const RuntimeShape &input2_shape, const T3 *input2_data, + const RuntimeShape &output_shape, T2 *output_data) +{ + // Drop shape of second input: not needed. + ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data); +} + +template <typename D, typename T> +void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + int64_t flatsize; + // Allow select operator executions on mixed scalar tensors and one element + // tensors. + if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 && + input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) + { + flatsize = 1; + } + else + { + flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape); + } + for (int64_t i = 0; i < flatsize; ++i) + { + output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i]; + } +} + +template <typename D, typename T> +void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int64_t outer_size = input_condition_shape.FlatSize(); + int64_t inner_size; + if (input_condition_shape.DimensionsCount() == 0) + { + inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape); + } + else + { + TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size); + inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape); + } + + int64_t offset = 0; + for (int64_t i = 0; i < outer_size; i++) + { + const T *input_data = input_condition_data[i] ? input_x_data : input_y_data; + memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T)); + offset += inner_size; + } +} + +template <typename D, typename T> +void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data, + const RuntimeShape &input_x_shape, const T *input_x_data, + const RuntimeShape &input_y_shape, const T *input_y_data, + const RuntimeShape &output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4); + + const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape); + + NdArrayDesc<4> desc_condition; + NdArrayDesc<4> desc_x; + NdArrayDesc<4> desc_y; + NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape, + &desc_condition, &desc_x, &desc_y); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < extended_output_shape.Dims(0); ++b) + { + for (int y = 0; y < extended_output_shape.Dims(1); ++y) + { + for (int x = 0; x < extended_output_shape.Dims(2); ++x) + { + for (int c = 0; c < extended_output_shape.Dims(3); ++c) + { + const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c); + const int x_index = SubscriptToIndex(desc_x, b, y, x, c); + const int y_index = SubscriptToIndex(desc_y, b, y, x, c); + output_data[Offset(extended_output_shape, b, y, x, c)] = + input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index]; + } + } + } + } +} + +template <typename D, typename T> +void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data, + T *output_data) +{ + const size_t size = input_condition_shape.FlatSize(); + if (size == 0) + { + // Dimension is zero, in which case we don't need to output. + return; + } + const size_t cond_rank = input_condition_shape.DimensionsCount(); + + std::vector<int> dims_to_count(cond_rank, 0); + int cur_flat_size = size; + for (int i = 0; i < cond_rank; ++i) + { + dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i); + cur_flat_size = dims_to_count[i]; + } + + int output_index = 0; + for (int i = 0; i < size; ++i) + { + if (input_condition_data[i]) + { + // Insert the coordinate of the current item (row major) into output. + int flat_index = i; + for (int j = 0; j < cond_rank; ++j) + { + int coord_j = flat_index / dims_to_count[j]; + output_data[output_index * cond_rank + j] = coord_j; + flat_index %= dims_to_count[j]; + } + output_index++; + } + } +} + +// For easy implementation, the indices is always a vector of size-4 vectors. +template <typename T, typename TI> +inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values, + T default_value, bool value_is_scalar, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int value_count = indices.size(); + + // First fill the output_data with default value. + const int num_elements = output_shape.FlatSize(); + for (int i = 0; i < num_elements; ++i) + { + output_data[i] = default_value; + } + + // Special handle for value is scalar case to avoid checking the boolean + // condition within the loop every time. + if (value_is_scalar) + { + for (int i = 0; i < value_count; ++i) + { + const std::vector<TI> &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = *values; // just use the first value. + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } + return; + } + + // Go through the values and indices to fill the sparse values. + for (int i = 0; i < value_count; ++i) + { + const std::vector<TI> &index = indices[i]; + TFLITE_DCHECK_EQ(index.size(), 4); + const T value = values[i]; + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; + } +} + +template <typename T> +inline void Pow(const RuntimeShape &input1_shape, const T *input1_data, + const RuntimeShape &input2_shape, const T *input2_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = std::pow(input1_data[i], input2_data[i]); + } +} + +template <typename T> +inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data, + const RuntimeShape &unextended_input2_shape, const T *input2_data, + const RuntimeShape &unextended_output_shape, T *output_data) +{ + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1, + &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) + { + for (int y = 0; y < output_shape.Dims(1); ++y) + { + for (int x = 0; x < output_shape.Dims(2); ++x) + { + for (int c = 0; c < output_shape.Dims(3); ++c) + { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = std::pow(in1_val, in2_val); + } + } + } + } +} + +template <typename Scalar> +void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("Reverse"); + + int outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_axis = input_shape.Dims(axis); + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_axis; ++j) + { + const int start_pos = (i * dims_at_axis + j) * copy_size; + Scalar *output_ptr = output_data + start_pos; + int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size; + memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar)); + } + } +} + +template <typename Scalar, typename TS> +void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim, + const RuntimeShape &input_shape, const Scalar *input_data, + const RuntimeShape &output_shape, Scalar *output_data) +{ + ruy::profiler::ScopeLabel label("ReverseSequence"); + + int outer_size = 1; + int outer_dim = std::min(batch_dim, seq_dim); + int medium_dim = std::max(batch_dim, seq_dim); + for (int i = 0; i < outer_dim; ++i) + { + outer_size *= input_shape.Dims(i); + } + + int medium_size = 1; + for (int i = outer_dim + 1; i < medium_dim; ++i) + { + medium_size *= input_shape.Dims(i); + } + + int copy_size = 1; + for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i) + { + copy_size *= input_shape.Dims(i); + } + + const int dims_at_outer_dim = input_shape.Dims(outer_dim); + const int dims_at_medium_dim = input_shape.Dims(medium_dim); + + Scalar *output_ptr; + if (batch_dim > seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + int sl = seq_lengths[q] - 1; + if (j > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size; + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } + else if (batch_dim < seq_dim) + { + for (int i = 0; i < outer_size; ++i) + { + for (int j = 0; j < dims_at_outer_dim; ++j) + { + const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size; + int sl = seq_lengths[j] - 1; + const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size; + for (int p = 0; p < medium_size; ++p) + { + for (int q = 0; q < dims_at_medium_dim; ++q) + { + const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size; + const Scalar *in_ptr = input_data + in_pos; + if (q > sl) + { + output_ptr = output_data + in_pos; + } + else + { + const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size; + output_ptr = output_data + out_pos; + } + memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar)); + } + } + } + } + } +} + +template <typename T> +inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data, + const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data, + const RuntimeShape &output_shape, T *output_data) +{ + const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape); + + memset(output_data, 0, sizeof(T) * output_shape.FlatSize()); + + for (int i = 0; i < input_shape.Dims(0); i++) + { + int output_index = segment_ids_data[i]; + for (int j = 0; j < segment_flat_size; ++j) + { + output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j]; + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h index 958fd4b74..6c0220c62 100644 --- a/compiler/luci-interpreter/src/core/KernelParams.h +++ b/compiler/luci-interpreter/src/core/KernelParams.h @@ -170,6 +170,11 @@ struct ResizeNearestNeighborParams bool half_pixel_centers; }; +struct ShapeParams +{ + loco::DataType out_type; +}; + struct SubParams { Activation activation; diff --git a/compiler/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-interpreter/src/kernels/Fill.cpp new file mode 100644 index 000000000..e09d6331a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/Utils.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output) + : Kernel({dims, value}, {output}) +{ +} + +template <typename T> void Fill::configureShape() +{ + const auto dims_data = getTensorData<T>(dims()); + Shape output_shape(dims()->shape().dim(0)); + + for (int i = 0; i < output_shape.num_dims(); ++i) + { + T data = dims_data[i]; + if (data < 0) + throw std::runtime_error("Fill dimensions must be >= 0"); + + output_shape.dim(i) = data; + } + + output()->resize(output_shape); +} + +void Fill::configure() +{ + const auto dims_shape = dims()->shape(); + const auto value_shape = value()->shape(); + + // Make sure the 1st input tensor is 1-D + LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1); + + // Make sure the 1st input tensor is int32 or int64 + LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or + dims()->element_type() == DataType::S64); + + // Make sure the 2nd input tensor is a scalar + LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0) + + // Check zero point and scale for S16 and S8 + if (value()->element_type() == loco::DataType::S16 or + value()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale()); + LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point()); + + if (value()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(value()->zero_point() == 0); + } + // Resize output + switch (dims()->element_type()) + { + case DataType::S32: + configureShape<int32_t>(); + break; + case DataType::S64: + configureShape<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Fill::execute() const +{ + switch (output()->element_type()) + { + case DataType::S8: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()), + getTensorShape(output()), getTensorData<int8_t>(output())); + break; + case DataType::S16: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()), + getTensorShape(output()), getTensorData<int16_t>(output())); + break; + case DataType::S32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()), + getTensorShape(output()), getTensorData<int32_t>(output())); + break; + case DataType::S64: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()), + getTensorShape(output()), getTensorData<int64_t>(output())); + break; + case DataType::FLOAT32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()), + getTensorShape(output()), getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Fill.h b/compiler/luci-interpreter/src/kernels/Fill.h new file mode 100644 index 000000000..184f0cb83 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FILL_H +#define LUCI_INTERPRETER_KERNELS_FILL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Fill : public Kernel +{ +public: + Fill(const Tensor *dims, const Tensor *value, Tensor *output); + + const Tensor *dims() const { return _inputs[0]; } + const Tensor *value() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void configureShape(); +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FILL_H diff --git a/compiler/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-interpreter/src/kernels/Fill.test.cpp new file mode 100644 index 000000000..cf56df507 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Fill.test.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FillTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector<int32_t> dims_data = {2, 3}; + std::vector<T> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data); + + std::vector<int32_t> ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector<int32_t> dims_data = {2, 3}; + std::vector<float> value_data = {5}; + + int32_t zero_point = 0; + + if (DT == loco::DataType::S8) + zero_point = 1; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point, + value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + + std::vector<int32_t> ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, FillInt) +{ + // Run for int32_t input + runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get()); + // Run for int64_t input + runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get()); + // Run for int8_t input + runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get()); + // Run for int16_t input + runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get()); + + SUCCEED(); +} + +TEST_F(FillTest, FillFloat) +{ + Shape dims_shape{3}; + + std::vector<int64_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5}; + + std::vector<int32_t> ref_output_shape{2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, Invalid_Input_Shape_NEG) +{ + Shape dims_shape{1, 3}; + + std::vector<int32_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FillTest, Invalid_Value_Shape_NEG) +{ + Shape dims_shape{3}; + + std::vector<int32_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp index 2fbeefce4..bae1eac70 100644 --- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp @@ -19,6 +19,8 @@ #include "kernels/Utils.h" +#include <limits> + namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp index 6fee93890..42aab330c 100644 --- a/compiler/luci-interpreter/src/kernels/Pack.cpp +++ b/compiler/luci-interpreter/src/kernels/Pack.cpp @@ -76,9 +76,8 @@ void Pack::configure() } } - if (t0->element_type() == DataType::S32 || t0->element_type() == DataType::U8 || - t0->element_type() == DataType::S8 || t0->element_type() == DataType::S16 || - t0->element_type() == DataType::S64) + if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 || + t0->element_type() == DataType::S16) { LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point()); LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale()); diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp index 2404e4303..d16320b78 100644 --- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp @@ -38,18 +38,26 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes, std::vector<Tensor> tmp_inputs; for (int i = 0; i < input_datas.size(); i++) { - if (std::is_same<T, float>::value) + if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value || + std::is_same<T, int64_t>::value) { tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, "")); memory_manager->allocate_memory(tmp_inputs[i]); tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); } - else + else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) { tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, "")); memory_manager->allocate_memory(tmp_inputs[i]); tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); } + else + { + assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested"); + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } } for (int i = 0; i < input_datas.size(); i++) { @@ -57,10 +65,14 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes, } Tensor output_tensor = makeOutputTensor(element_type); - if (!std::is_same<T, float>::value) + if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) { output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128); } + else if (std::is_same<T, int16_t>::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f, 0); + } PackParams params{}; params.axis = axis; @@ -79,7 +91,7 @@ template <typename T> class PackTest : public ::testing::Test { }; -using DataTypes = ::testing::Types<uint8_t, float>; +using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>; TYPED_TEST_SUITE(PackTest, DataTypes); TYPED_TEST(PackTest, ThreeInputs) diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp index fe172884b..c07f6e310 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.cpp @@ -20,6 +20,8 @@ #include <tensorflow/lite/kernels/internal/reference/pad.h> +#include <limits> + namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp index e90469239..197cdaa69 100644 --- a/compiler/luci-interpreter/src/kernels/PadV2.cpp +++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp @@ -20,6 +20,8 @@ #include <tensorflow/lite/kernels/internal/reference/pad.h> +#include <limits> + namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp new file mode 100644 index 000000000..d58cd1563 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceMax.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reduce.h> + +#include <stdexcept> +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +ReduceMax::ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params) +{ +} + +void ReduceMax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + LUCI_INTERPRETER_CHECK(num_axes <= 4); + + // We compute shapes of outputs in configure, assuming that outputs have + // static shape + // TODO Support dynamic shape + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); +} + +void ReduceMax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + // TODO Support quantized kernels + default: + throw std::runtime_error("Unsupported type."); + } +} + +void ReduceMax::evalFloat() const +{ + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + + int num_resolved_axis = 0; + LUCI_INTERPRETER_CHECK( + tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes, + getTensorData<int>(resolved_axes), &num_resolved_axis)); + + float init_value = std::numeric_limits<float>::lowest(); + tflite::reference_ops::ReduceGeneric<float>( + getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData<float>(output()), getTensorShape(output()).DimsData(), + output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value, + [](const float current, const float in) -> float { return (in > current) ? in : current; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.h b/compiler/luci-interpreter/src/kernels/ReduceMax.h new file mode 100644 index 000000000..25a66278a --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H +#define LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class ReduceMax : public KernelWithParams<ReducerParams> +{ +public: + ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp new file mode 100644 index 000000000..ab688827b --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReduceMax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReduceMaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ReduceMaxTest, FloatNotKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{1, 0, -3, -3}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{23, 24}; + std::initializer_list<int32_t> ref_output_shape{2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ReduceMaxTest, FloatKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{20, 22, 24}; + std::initializer_list<int32_t> ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-interpreter/src/kernels/Shape.cpp new file mode 100644 index 000000000..0429fe1e5 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms) + : KernelWithParams<ShapeParams>({input}, {output}, params) +{ +} + +void ShapeKernel::configure() +{ + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or + output()->element_type() == DataType::S64); + const auto input_shape = input()->shape(); + + Shape output_shape(1); + output_shape.dim(0) = input_shape.num_dims(); + + output()->resize(output_shape); +} + +void ShapeKernel::execute() const +{ + switch (params().out_type) + { + case DataType::S32: + evalInt<int32_t>(); + break; + case DataType::S64: + evalInt<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void ShapeKernel::evalInt() const +{ + const auto input_shape = input()->shape(); + + auto output_data = getTensorData<T>(output()); + + for (int i = 0; i < input_shape.num_dims(); ++i) + { + output_data[i] = input_shape.dim(i); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Shape.h b/compiler/luci-interpreter/src/kernels/Shape.h new file mode 100644 index 000000000..cfaadec91 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H +#define LUCI_INTERPRETER_KERNELS_SHAPE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ShapeKernel : public KernelWithParams<ShapeParams> +{ +public: + ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void evalInt() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H diff --git a/compiler/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-interpreter/src/kernels/Shape.test.cpp new file mode 100644 index 000000000..4763e016c --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/Shape.test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ShapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager) +{ + Shape input_shape{1, 3, 1, 3, 5}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(dataType); + + ShapeParams params{}; + params.out_type = dataType; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<T> ref_output_data{1, 3, 1, 3, 5}; + EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data); + + std::vector<int32_t> ref_output_shape{5}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ShapeTest, OutTypeInt) +{ + + // Run for int32_t output + runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get()); + // Run for int64_t output + runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get()); + + SUCCEED(); +} + +TEST_F(ShapeTest, Invalid_Output_Type_NEG) +{ + Shape input_shape{1, 3}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + ShapeParams params{}; + params.out_type = loco::DataType::FLOAT32; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp index 281988272..aa6820889 100644 --- a/compiler/luci-interpreter/src/kernels/SplitV.cpp +++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp @@ -43,14 +43,36 @@ void SplitV::configure() auto sizes_data = getTensorData<int32_t>(size_splits()); assert(size_splits()->shape().num_dims() == 1); + + int32_t sum = 0; + const auto num_dims_size_spits = size_splits()->shape().dim(0); + int32_t count_neg_dim = 0; + + for (int32_t i = 0; i < num_dims_size_spits - 1; ++i) + { + if (sizes_data[i] != -1) + { + sum += sizes_data[i]; + } + else + { + count_neg_dim++; + } + } + assert(count_neg_dim < 2); assert(size_splits()->shape().num_elements() == num_split); - assert(std::accumulate(sizes_data, sizes_data + num_split, 0) == - input()->shape().dim(_axis_value)); auto output_shape = input()->shape(); for (int32_t i = 0; i < num_split; ++i) { - output_shape.dim(_axis_value) = sizes_data[i]; + if (sizes_data[i] == -1) + { + output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum; + } + else + { + output_shape.dim(_axis_value) = sizes_data[i]; + } _outputs[i]->resize(output_shape); } } diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp index c6452cdb0..a8730d861 100644 --- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp +++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp @@ -136,6 +136,11 @@ void StridedSlice::execute() const getTensorData<uint8_t>(input()), getTensorShape(output()), getTensorData<uint8_t>(output())); break; + case DataType::S32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData<int32_t>(input()), getTensorShape(output()), + getTensorData<int32_t>(output())); + break; default: throw std::runtime_error("Unsupported type."); } diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index dba39050c..40207090b 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -187,7 +187,7 @@ void GraphLoader::loadTensors() const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i)); if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node)) - throw std::runtime_error("Unknown Custom Node, yet."); + throw std::runtime_error("Unsupported Custom operator. " + node->name()); if (!isTensorProducingNode(node)) continue; diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp index decccaa1d..501e84752 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Add.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleAdd *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp index 0ee367748..f3ca55744 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleArgMax *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); const Tensor *axis = helper.getInputTensor(node->dimension()); diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp index efb011257..a8135706f 100644 --- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleAveragePool2D *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->value()); diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp index aae3dbab1..9da2f6d93 100644 --- a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node); assert(node->arity() == 2); const Tensor *lhs = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp index 33d0e2db6..ac6ebb30f 100644 --- a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleBatchToSpaceND *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp index 21ea5ceab..a16354c96 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleCast *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node); assert(node->arity() == 1); diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp index 7823a9967..ba2564ea2 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleConcatenation *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node); std::vector<const Tensor *> inputs(node->numValues()); for (uint32_t i = 0; i < node->numValues(); ++i) { diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp index b48d97d19..218165e20 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleConv2D *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp index 0310fb23f..174946367 100644 --- a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleDepthToSpace *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp index db26ecf2e..8af1e3b58 100644 --- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -25,9 +25,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleDepthwiseConv2D *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp index 4aae56469..787322e9b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node); const Tensor *input = helper.getInputTensor(node->input()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp index 56c2e98f2..0611dfdab 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Div.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleDiv *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); const Tensor *input2 = helper.getInputTensor(node->y()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp index 98ee78be7..a79985e3b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleElu *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp index 649d9bfe9..59692883f 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp @@ -25,9 +25,7 @@ std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_ KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleEqual *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp index 411d142c3..30d11cb89 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleExp *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp new file mode 100644 index 000000000..3aefdf1c5 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Fill.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node); + assert(node->arity() == 2); + + const auto dims = helper.getInputTensor(node->dims()); + const auto value = helper.getInputTensor(node->value()); + auto output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Fill>(dims, value, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp index 6d8435f6c..e0a223116 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleFloor *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp index cae2e186e..a45d89e38 100644 --- a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleFloorDiv *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp index 0b8ac44bd..b7b742b8a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleFullyConnected *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp index 9df9775c5..2ee2906e0 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node); assert(node->arity() == 2); const Tensor *params = helper.getInputTensor(node->params()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp index 3db11b840..80aa63cf0 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleGreater *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp index dbe051d67..272f2843b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleGreaterEqual *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp index 5983f4d3b..3ac7d4941 100644 --- a/compiler/luci-interpreter/src/loader/nodes/If.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleIf *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node); auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node); assert(node->arity() == 1 + node->input_count()); assert(output_nodes.size() == static_cast<size_t>(node->output_count())); diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp index 0a8fb85e2..06031e5bc 100644 --- a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleInstanceNorm *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp index 05f920266..6e22e6d4e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleL2Normalize *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp index 0e70afafa..95b55896f 100644 --- a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleL2Pool2D *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->value()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp index 7b229ad0e..bbf5067b1 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLeakyRelu *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp index 81156f275..ae914ecc9 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Less.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLess *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp index 82141e5ae..f1b424b55 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLessEqual *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp index a12dce0a0..962ca2d7c 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp @@ -25,9 +25,7 @@ std::unique_ptr<Kernel> build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLocalResponseNormalization *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp index 6cf547aae..432204115 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLogSoftmax *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->logits()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp index 2c9549f71..bf3cb671a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLogicalAnd *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp index 3d327d6c4..fefcd9a06 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLogicalNot *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp index 50566bb30..a416cb401 100644 --- a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLogicalOr *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp index e4160edb3..4a69deef1 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleLogistic *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp index 914f22838..f66a206ca 100644 --- a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleMaxPool2D *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->value()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp index dc50d6773..d0bff776a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleMaximum *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp index 97d91207f..0dec63e79 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleMean *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp index ff659524a..1a49c1090 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleMinimum *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp index ebf294583..b221b4574 100644 --- a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleMirrorPad *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp index 4f9da967d..f9984853a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleMul *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp index 23c00537b..9a9ecf991 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleNeg *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp index 8e5711fc1..3916a5854 100644 --- a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleNotEqual *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node); assert(node->arity() == 2); const Tensor *x = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp index e31601bf6..f3d700c95 100644 --- a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CirclePRelu *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp index 699472081..efc5850e0 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CirclePack *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node); assert(node->arity() == node->values_count()); std::vector<const Tensor *> inputs(node->values_count()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp index 770549295..67ce997a7 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CirclePad *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp index 12deb15f0..e378a972a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CirclePadV2 *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp index b430bc94f..d32fc3dbb 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CirclePow *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp index fd9836345..cb36fb6da 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp @@ -24,9 +24,8 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node); + assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); Tensor *output = helper.getOutputTensor(node); diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp new file mode 100644 index 000000000..1a8522dd6 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReduceMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReduceMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReduceMax *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::ReduceMax>(input, axes, output, temp_index, resolved_axes, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp index d53a66a06..1d64c1c4e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleRelu *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp index f1b5d219b..e50cd2545 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleRelu6 *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->features()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp index 89e3ecebf..76ddd88a3 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleReshape *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->tensor()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp index dca56588d..dc2b88ad3 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleResizeBilinear *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp index d1ea19c0f..c7058ae78 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp @@ -25,9 +25,7 @@ std::unique_ptr<Kernel> build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleResizeNearestNeighbor *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp index ea00f5408..c1a7f5350 100644 --- a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleReverseV2 *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->tensor()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp index ff87f435c..0714a5dba 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleRsqrt *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp index 89528d5ee..d172ef438 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp @@ -24,9 +24,8 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node); + assert(node->arity() == 5); const Tensor *input = helper.getInputTensor(node->input()); const Tensor *feature = helper.getInputTensor(node->weight_feature()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp new file mode 100644 index 000000000..d1edbc794 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Shape.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node); + assert(node->arity() == 1); + + const auto input = helper.getInputTensor(node->input()); + auto output = helper.getOutputTensor(node); + + ShapeParams shape_params{}; + shape_params.out_type = node->out_type(); + + return std::make_unique<kernels::ShapeKernel>(input, output, shape_params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp index 741cd0806..60ac6417c 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSlice *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp index b15e4b6f3..f41f63f6f 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSoftmax *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->logits()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp index 91c237aa5..b6e6cf516 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSpaceToBatchND *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node); assert(node->arity() == 3); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp index 3cbbd9718..63fdb95ec 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSpaceToDepth *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp index 32553ad5e..3f6d4a7df 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Split.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSplit *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node); auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node); assert(node->arity() == 2); assert(output_nodes.size() == static_cast<size_t>(node->num_split())); diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp index d78816447..0788822ca 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSplitV *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node); auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node); assert(node->arity() == 3); assert(output_nodes.size() == static_cast<size_t>(node->num_split())); diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp index 56dd986f1..b9843fe0b 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSqrt *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp index 43aadb969..0ad7c1772 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Square.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSquare *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp index 6a2717aa2..e4c6fd851 100644 --- a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSquaredDifference *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp index 583ff9314..6885f8077 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSqueeze *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp index fe5fa7707..359b4e3e9 100644 --- a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleStridedSlice *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node); assert(node->arity() == 4); const Tensor *input = helper.getInputTensor(node->input()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp index bad4fbb13..a6252cb53 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleSub *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node); assert(node->arity() == 2); const Tensor *input1 = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp index f4255291b..a58ef60a8 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleTanh *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node); assert(node->arity() == 1); const Tensor *input = helper.getInputTensor(node->x()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp index 4e095fbbc..ea17d8311 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleTranspose *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node); assert(node->arity() == 2); const Tensor *input = helper.getInputTensor(node->a()); diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp index 1b954c35c..d773e301e 100644 --- a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleTransposeConv *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node); assert(node->arity() == 4); const Tensor *input_sizes = helper.getInputTensor(node->inputSizes()); diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp index 978c738c6..a1c0d323a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleUnpack *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node); auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node); assert(node->arity() == 1); assert(output_nodes.size() == static_cast<size_t>(node->num())); diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp index 284dc0c68..8fde6ec8a 100644 --- a/compiler/luci-interpreter/src/loader/nodes/While.cpp +++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp @@ -24,9 +24,7 @@ namespace luci_interpreter std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node, KernelBuilderHelper &helper) { - const auto *node = dynamic_cast<const luci::CircleWhile *>(circle_node); - if (node == nullptr) - throw std::runtime_error("wrong builder for operation"); + const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node); auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node); assert(node->arity() == node->input_count()); diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt index c8a2e12e1..642cf14a3 100644 --- a/compiler/luci-micro/CMakeLists.txt +++ b/compiler/luci-micro/CMakeLists.txt @@ -15,7 +15,7 @@ set(CMAKE_ARM_OPTIONS -DLUCI_STATIC=ON -DBUILD_CMSIS_NN_FUNCTIONS=ON -DTARGET_CPU=cortex-m7 - "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake" + "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake" "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu" "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}" "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}" diff --git a/compiler/luci-micro/luci-interpreter/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/CMakeLists.txt new file mode 100644 index 000000000..1f7acee87 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") +set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") +if (NOT LUCI_INTERPRETER_PAL_DIR) + set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux") +endif() + +set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst) + +if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX) + set(LUCI_INTERPRETER_SUFFIX "") +else() + set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX}) +endif() + +add_subdirectory(src) diff --git a/compiler/luci-micro/luci-interpreter/README.md b/compiler/luci-micro/luci-interpreter/README.md new file mode 100644 index 000000000..77ec5c81c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/README.md @@ -0,0 +1,158 @@ +# luci-interpreter + +`luci-interpreter` is an inference engine for neural networks represented in luci IR. +See `compiler/luci/lang` directory for details about IR. +You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`. + +`luci-interpreter` provides: +- Basic inference functionality, input setters and output getters +- Interface for inspecting hidden interpreter state, like activation values during inference +- Customization mechanisms to fit the interpreter to specific platforms, like MCUs + +Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory + +## Basic usage + +Minimal usage includes: +- Setting input data +- Running inference +- Fetching inference results + +Interpreter object is reusable and can run multiple inferences. +Elements in tensors (input/output/internal) are stored contiguously and have C-like layout: +This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1. + +Input and output tensors have the same indexes as in original luci model. + +**Usage example:** +``` c++ +// Note getTensorSize is a function that computes tensor size, +// it is not part of interpreter and should be implemented by user + +luci_interpreter::Interpreter interpreter(luci_module); + +// Set inputs +// assuming model has only one input and one output +const auto input_nodes = loco::input_nodes(module->graph()); + +const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]); +std::vector<char> input_data(getTensorSize(input_node)); +// Initialize input data here + +interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + +// Start inference +interpreter.interpret(); + +// Fetch inference results +const auto output_nodes = loco::output_nodes(module->graph()); +const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]); +std::vector<char> output_data(getTensorSize(output_node)); +interpreter.readOutputTensor(output_node, output_data.data(), output_data.size()); +``` + +## Inspecting intermediate state + +Interpreter provides interfaces to investigate internal state of interpreter during inference. + +This is done by "observer" mechanism: +- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object +- `ExecutionObserver` defines several callback methods user can override to inject custom code + +ExecutionObserver provides three callbacks: +- `postTensorWrite` checks contents of output tensor after operation execution +- `preOperatorExecute` notifies that interpreter is going to execute operation +- `postOperatorExecute` notifies that interpreter has finished execution of an operation + +See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details. + +**Usage example:** +``` c++ +class CustomExecutionObserver: public luci_interpreter::ExecutionObserver +{ +public: + void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override + { + if (tensor->element_type() != loco::DataType::FLOAT32) + return; + for (int i = 0; i < tensor->shape().num_elements(); ++i) + std::cout << tensor->data<float>[i] << ", "; + } + + // User observer can override only needed methods, + // others will inherit empty implementation from base observer. + + // void preOperatorExecute(const luci::CircleNode *node); + // void postOperatorExecute(const luci::CircleNode *node); +}; + +luci_interpreter::Interpreter interpreter(module); +CustomExecutionObserver observer; +interpreter.attachObserver(&observer); + +// initialize input_data +interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + +interpreter.interpret(); +``` + +## Customizing inference + +### Memory manager + +Interpreter provides a handle for altering default memory management mechanisms. + +This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details. + +This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory. + +User can construct an interpreter with one of predefined memory managers or their own custom memory manager. +Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. + +List of predefined memory managers: +- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one. +- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests. +- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete. +- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs). + +**SimpleMemoryManager usage example:** + +No need to select anything, to use this memory manager. +``` c++ +luci_interpreter::Interpreter interpreter(module); +``` + +**TestMemoryManager usage example:** + +``` c++ +luci_interpreter::TestMemoryManager mm; +luci_interpreter::Interpreter interpreter(module, &mm); +``` + +**BuddyMemoryManager usage example:** + +`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation. + +This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations. + +Limitations +- Current implementation uses only lower power-of-two bytes of given buffer. + + For example for 1000 bytes buffer, only lower 512 bytes will be used. +- Current implementation can handle maximum 4 gigabyte memory pool + +``` c++ + constexpr int buffer_size = 2048; + static uint8_t buffer[buffer_size]; + luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size); + luci_interpreter::Interpreter interpreter(module.get(), &memory_manager); +``` + +**StaticMemoryManager usage example:** +``` c++ +TBD when it is merged +``` + +## Further reading + +If you want to participate in development, please read `DEVELOPER.md` for SW architecture details. diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h new file mode 100644 index 000000000..205baa626 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h @@ -0,0 +1,144 @@ +/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/MemoryManager.h" + +#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H + +namespace luci_interpreter +{ + +class BuddyMemoryManager : public IMemoryManager +{ +public: + BuddyMemoryManager(uint8_t *memory_start, int32_t memSize); + + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + +private: + struct Block + { + Block *next_free; + bool is_free; + uint32_t size; + // debug field + Block *self; + }; + + Block *_start_block; + int32_t _num_blocks; + uint32_t _size; + Block *_free_blocks[32]{}; + + static int32_t lowerLog2(uint32_t val) + { + int32_t i = 0; + while (val >>= 1) + i++; + + return i; + } + + void addToBlocks(Block *block, int32_t l) + { + if (!block) + return; + + block->next_free = _free_blocks[l]; + _free_blocks[l] = block; + } + + void removeFromBlocks(const Block *block, int32_t l) + { + if (!block) + return; + + Block *tmp = _free_blocks[l]; + + if (block == tmp) + { + _free_blocks[l] = block->next_free; + return; + } + + while (tmp) + { + if (tmp->next_free == block) + { + tmp->next_free = block->next_free; + return; + } + + tmp = tmp->next_free; + } + } + + void divideBlock(Block *block, int32_t l) + { + int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block); + + removeFromBlocks(block, l); + + // there is no need to add to the free_blocks list here + block->is_free = true; + block->size = size; + block->self = block; + + Block *buddy; + buddy = (Block *)((uint8_t *)block + sizeof(Block) + size); + buddy->is_free = true; + buddy->size = size; + buddy->self = buddy; + + addToBlocks(buddy, l - 1); + } + + Block *mergeBlock(Block *block) + { + Block *buddy; + + const int32_t l = lowerLog2(block->size + sizeof(Block)); + + const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block); + buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block); + + if (!buddy->is_free || buddy->size != block->size) + return nullptr; + + if (block > buddy) + { + Block *x = block; + block = buddy; + buddy = x; + } + + removeFromBlocks(block, l); + removeFromBlocks(buddy, l); + + block->size = block->size * 2 + sizeof(Block); + block->is_free = true; + block->self = block; + + addToBlocks(block, l + 1); + + return block; + } +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h new file mode 100644 index 000000000..375b1ae20 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ +#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ + +#include <luci/Import/GraphBuilderRegistry.h> + +namespace luci_interpreter +{ + +/** + * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from + * model's file. + * + * @warning Use this source only in case when model's buffer alive longer than Interpreter. + */ +std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying(); + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__ diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h new file mode 100644 index 000000000..8e2f457a5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_INTERPRETER_H +#define LUCI_INTERPRETER_INTERPRETER_H + +#include "luci_interpreter/core/Tensor.h" + +#include <luci/IR/Nodes/CircleInput.h> +#include <luci/IR/Nodes/CircleOutput.h> + +#include "luci_interpreter/MemoryManager.h" +#include <luci/IR/Module.h> + +#include <memory> +#include <vector> +#include <unordered_map> + +namespace luci_interpreter +{ + +class ExecutionObserver +{ +public: + virtual ~ExecutionObserver(); + + // Called when the value of a tensor has been updated during execution. + virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor); + + // Called before / after executing an operator. + // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput, + // CircleConst and Circle*Out). + virtual void preOperatorExecute(const luci::CircleNode *node); + virtual void postOperatorExecute(const luci::CircleNode *node); +}; + +class Interpreter +{ +public: + explicit Interpreter(const luci::Module *module); + + explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager); + + ~Interpreter(); + + void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size); + + void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size); + + void interpret(); + + void attachObserver(ExecutionObserver *observer); + + const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; } + +private: + // _default_memory_manager should be before _runtime_module due to + // the order of deletion in the destructor + std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr; + std::unique_ptr<class RuntimeModule> _runtime_module; + + // Observer functionality support. + std::unique_ptr<struct RuntimeToIR> _runtime_to_ir; + std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor; + std::unique_ptr<class EventNotifier> _event_notifier; + std::vector<ExecutionObserver *> _observers; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_INTERPRETER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h new file mode 100644 index 000000000..f32c52095 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_MEMORY_MANAGER_H + +#include "luci_interpreter/core/DataType.h" +#include "luci_interpreter/core/Tensor.h" + +namespace luci_interpreter +{ + +class IMemoryManager +{ +public: + virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0; + virtual void release_memory(luci_interpreter::Tensor &tensor) = 0; + + virtual ~IMemoryManager() = default; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h new file mode 100644 index 000000000..658a1c609 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ + +class SimpleMemoryManager : public IMemoryManager +{ +public: + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h new file mode 100644 index 000000000..ded7bde79 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ + +// Used for allocations in static buffer, using offsets defined in luci model. +class StaticMemoryManager : public IMemoryManager +{ +public: + StaticMemoryManager() = delete; + + explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr) + { /* Do nothing */ + } + + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + +private: + // Stores a pointer to the beginning of the allocated memory buffer. + uint8_t *_buffer_ptr; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h new file mode 100644 index 000000000..397bbed76 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ +// Memory Manager for using in kernels tests. This eliminates the need to manually delete the +// allocated memory in tests. This mem_manager remembers all its allocations and in destructor +// delete all allocations. +class TestMemoryManager : public IMemoryManager +{ +public: + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + + ~TestMemoryManager() override + { + for (auto allocation : allocations) + { + delete[] allocation; + } + } + +private: + std::vector<uint8_t *> allocations; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H diff --git a/compiler/circledump/include/circleread/Model.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h index 234db8b4c..27bf719b5 100644 --- a/compiler/circledump/include/circleread/Model.h +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h @@ -14,30 +14,23 @@ * limitations under the License. */ -#ifndef __CIRCLEREAD_MODEL_H__ -#define __CIRCLEREAD_MODEL_H__ +#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H +#define LUCI_INTERPRETER_CORE_DATATYPE_H -#include <mio/circle/schema_generated.h> +#include <loco/IR/DataType.h> +#include <loco/IR/DataTypeTraits.h> -#include <memory> +#include <cstddef> -namespace circleread +namespace luci_interpreter { -struct Model -{ - virtual ~Model() = default; +using DataType = loco::DataType; - virtual const ::circle::Model *model(void) const = 0; -}; +template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>; -/** - * @brief Load Circle model (as a raw Model) from a given path - * - * @note May return a nullptr - */ -std::unique_ptr<Model> load_circle(const std::string &path); +inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); } -} // namespace circleread +} // namespace luci_interpreter -#endif // __CIRCLEREAD_MODEL_H__ +#endif // LUCI_INTERPRETER_CORE_DATATYPE_H diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h new file mode 100644 index 000000000..bb9ff6d4a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_TENSOR_H +#define LUCI_INTERPRETER_CORE_TENSOR_H + +#include "luci_interpreter/core/DataType.h" + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <vector> + +namespace luci_interpreter +{ + +class Shape +{ +public: + explicit Shape(int rank) : _dims(rank, 0) {} + + Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {} + + int num_dims() const { return _dims.size(); } + + int32_t dim(int i) const + { + assert(i >= 0 && i < static_cast<int>(_dims.size())); + return _dims[i]; + } + + int32_t &dim(int i) + { + assert(i >= 0 && i < static_cast<int>(_dims.size())); + return _dims[i]; + } + + int32_t num_elements() const + { + int32_t result = 1; + for (const int32_t dim : _dims) + { + result *= dim; + } + return result; + } + + bool operator==(const Shape &other) const { return _dims == other._dims; } + + bool operator!=(const Shape &other) const { return !operator==(other); } + +private: + std::vector<int32_t> _dims; +}; + +// Tensor affine quantization parameters. +// +// The relationship between real and quantized values: +// real_value = (quantized_value - zero_point) * scale +// +// In per-tensor case, 'scale' and 'zero_point' are one element each. +// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size +// of the quantized dimension. +// +// Note that due to historical and performance reasons, per-tensor quantization uses unsigned +// integer types, while per-channel uses signed types assuming 'zero_point' == 0. +struct AffineQuantization +{ + std::vector<float> scale; + std::vector<int32_t> zero_point; + int32_t quantized_dimension; +}; + +class Tensor +{ +public: + Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name); + + DataType element_type() const { return _element_type; } + + const Shape &shape() const { return _shape; } + + float scale() const + { + assert(_quantization.scale.size() == 1); + return _quantization.scale[0]; + } + + int32_t zero_point() const + { + assert(_quantization.zero_point.size() == 1); + return _quantization.zero_point[0]; + } + + const std::vector<float> &scales() const { return _quantization.scale; } + + const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; } + + int32_t quantized_dimension() const { return _quantization.quantized_dimension; } + + template <typename T> const T *data() const + { + static_assert(std::is_same<uint8_t, char>::value or + std::is_same<uint8_t, unsigned char>::value); + return reinterpret_cast<const T *>(_data); + } + + template <typename T> T *data() + { + static_assert(std::is_same<uint8_t, char>::value or + std::is_same<uint8_t, unsigned char>::value); + return reinterpret_cast<T *>(_data); + } + + const std::string &name() const { return _name; } + + void readData(void *data_ptr, size_t data_size) const; + + void writeData(const void *data_ptr, size_t data_size); + + void resize(const Shape &new_shape); + + void set_data_buffer(uint8_t *buffer) + { + if (buffer == nullptr) + { + _data_allocated = false; + } + else + { + _data_allocated = true; + } + _data = buffer; + } + + bool is_observable() const { return _is_observable; } + + void set_observable(bool value) { _is_observable = value; } + + bool is_allocatable() const { return _is_allocatable; } + + void set_allocatable(bool value) { _is_allocatable = value; } + + bool is_data_allocated() const { return _data_allocated; } + + int32_t get_offset() const { return _offset; } + + void set_offset(int32_t offset) { _offset = offset; } + +private: + DataType _element_type; + Shape _shape; + AffineQuantization _quantization; + uint8_t *_data; + std::string _name; + bool _data_allocated; + // Write of tensor is reported to registered Observers only if this tensor is observable + // This is needed for tensors used in kernel implementation, but not present in original model. + bool _is_observable = true; + // Memory manager is called for tensor only if it is "allocatable". + // Kernel configuration could disable allocation of some tensors if they are not needed for + // particular operation. + bool _is_allocatable = true; + // Used by static memory manager. + // Stores the offset from the beginning of the allocated memory buffer. + int32_t _offset = -1; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_TENSOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst new file mode 100644 index 000000000..f0df58db3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst @@ -0,0 +1,62 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(While) diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h new file mode 100644 index 000000000..21e63296d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h> + +namespace luci_interpreter_pal +{ +template <typename T1, typename T2, typename T3> +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater<T1> cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h new file mode 100644 index 000000000..a274afb7e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> +#include <arm_nn_types.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool<int8_t>(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + assert(scratchpad_data != nullptr); + + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + assert(batches == 1); + + const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3); + + cmsis_nn_dims input_dims; + input_dims.n = 1; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = depth; + + cmsis_nn_dims output_dims; + output_dims.n = 1; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = depth; + + cmsis_nn_pool_params pool_params; + pool_params.stride.h = params.stride_height; + pool_params.stride.w = params.stride_width; + pool_params.padding.h = params.padding_values.height; + pool_params.padding.w = params.padding_values.width; + pool_params.activation.min = params.quantized_activation_min; + pool_params.activation.max = params.quantized_activation_max; + + cmsis_nn_dims filter_dims; + filter_dims.n = 1; + filter_dims.h = params.filter_height; + filter_dims.w = params.filter_width; + filter_dims.c = 1; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims, + output_data); + assert(res == ARM_MATH_SUCCESS); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + if (input_data_type == luci_interpreter::DataType::S8) + { + assert(input_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + + const int32_t output_width = output_shape.Dims(2); + const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3); + + const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth); + auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type)); + + luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h new file mode 100644 index 000000000..4dd77ffdc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h new file mode 100644 index 000000000..cfb84ea60 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/conv.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h> +#include <arm_nn_types.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, nullptr); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) +{ + if (scratchpad_data) + { + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + assert(conv_params.dilation.h == 1); + assert(conv_params.dilation.w == 1); + + conv_params.input_offset = params.input_offset; + conv_params.output_offset = params.output_offset; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = params.padding_values.height; + conv_params.padding.w = params.padding_values.width; + conv_params.activation.min = params.quantized_activation_min; + conv_params.activation.max = params.quantized_activation_max; + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = const_cast<int32_t *>(mult); + quant_params.shift = const_cast<int32_t *>(shifts); + + assert(conv_params.activation.min <= conv_params.activation.max); + assert(input_shape.DimensionsCount() == 4); + assert(filter_shape.DimensionsCount() == 4); + assert(output_shape.DimensionsCount() == 4); + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) + { + assert(bias_shape.FlatSize() == output_depth); + } + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = input_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + + auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data, + &filter_dims, filter_data, &bias_dims, bias_data, + &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); + } + else + { + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); + } +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + cmsis_nn_conv_params conv_params; + conv_params.dilation.h = params.dilation_height_factor; + conv_params.dilation.w = params.dilation_width_factor; + + if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 && + conv_params.dilation.w == 1) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3); + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + + conv_params.input_offset = params.input_offset; + conv_params.output_offset = params.output_offset; + conv_params.stride.h = params.stride_height; + conv_params.stride.w = params.stride_width; + conv_params.padding.h = params.padding_values.height; + conv_params.padding.w = params.padding_values.width; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = output_depth; + filter_dims.h = filter_height; + filter_dims.w = filter_width; + filter_dims.c = input_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = output_height; + output_dims.w = output_width; + output_dims.c = output_depth; + + const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims, + &filter_dims, &output_dims); + + luci_interpreter::Shape scratchpad_shape{buf_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h new file mode 100644 index 000000000..8463e571e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h new file mode 100644 index 000000000..120dcd803 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel<int8_t>( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + if (scratchpad_data) + { + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + assert(dw_conv_params.dilation.h == 1); + assert(dw_conv_params.dilation.w == 1); + + dw_conv_params.input_offset = params.input_offset; + dw_conv_params.output_offset = params.output_offset; + dw_conv_params.stride.h = params.stride_height; + dw_conv_params.stride.w = params.stride_width; + dw_conv_params.padding.h = params.padding_values.height; + dw_conv_params.padding.w = params.padding_values.width; + + dw_conv_params.activation.min = params.quantized_activation_min; + dw_conv_params.activation.max = params.quantized_activation_max; + dw_conv_params.ch_mult = params.depth_multiplier; + + cmsis_nn_per_channel_quant_params quant_params; + int32_t output_multiplier = params.output_multiplier; + int32_t output_shift = params.output_shift; + + quant_params.multiplier = &output_multiplier; + quant_params.shift = &output_shift; + + assert(dw_conv_params.activation.min <= dw_conv_params.activation.max); + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3); + if (bias_data) + { + assert(bias_shape.FlatSize() == output_depth); + } + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(0); + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + cmsis_nn_context ctx; + ctx.buf = scratchpad_data; + ctx.size = scratchpad_shape.Dims(0); + + auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims, + input_data, &filter_dims, filter_data, &bias_dims, + bias_data, &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); + } + else + { + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); + } +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.dilation.h = params.dilation_height_factor; + dw_conv_params.dilation.w = params.dilation_width_factor; + + if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 && + dw_conv_params.dilation.w == 1) + { + const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3); + + cmsis_nn_dims input_dims; + input_dims.n = batch_size; + input_dims.h = input_shape.Dims(1); + input_dims.w = input_shape.Dims(2); + input_dims.c = input_shape.Dims(3); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_shape.Dims(0); + filter_dims.h = filter_shape.Dims(1); + filter_dims.w = filter_shape.Dims(2); + filter_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batch_size; + output_dims.h = output_shape.Dims(1); + output_dims.w = output_shape.Dims(2); + output_dims.c = output_depth; + + const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size( + &dw_conv_params, &input_dims, &filter_dims, &output_dims); + + auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type)); + + luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h new file mode 100644 index 000000000..15ff0327b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ + +template <typename T> +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const uint8_t *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h new file mode 100644 index 000000000..4089d0a0c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include <tensorflow/lite/kernels/internal/reference/elu.h> + +namespace luci_interpreter_pal +{ + +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h new file mode 100644 index 000000000..32e905761 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected<int8_t>(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + assert(output_shape.DimensionsCount() == 2); + + const int batches = output_shape.Dims(0); + const int output_depth = output_shape.Dims(1); + + const int filter_dim_count = filter_shape.DimensionsCount(); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + + cmsis_nn_fc_params fc_params; + fc_params.input_offset = params.input_offset; + fc_params.output_offset = params.output_offset; + fc_params.filter_offset = params.weights_offset; + fc_params.activation.min = params.quantized_activation_min; + fc_params.activation.max = params.quantized_activation_max; + + cmsis_nn_per_tensor_quant_params quant_params; + quant_params.multiplier = params.output_multiplier; + quant_params.shift = params.output_shift; + + cmsis_nn_dims input_dims; + input_dims.n = batches; + input_dims.h = 1; + input_dims.w = 1; + input_dims.c = accum_depth; + + cmsis_nn_dims filter_dims; + filter_dims.n = accum_depth; + filter_dims.h = 1; + filter_dims.w = 1; + filter_dims.c = output_depth; + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_depth; + + cmsis_nn_dims output_dims; + output_dims.n = batches; + output_dims.h = 1; + output_dims.w = 1; + output_dims.c = output_depth; + + int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims); + auto buffer = std::make_unique<int8_t[]>(buf_size); + assert(buffer != nullptr); + + cmsis_nn_context ctx; + ctx.buf = buffer.get(); + ctx.size = buf_size; + + auto res = + arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims, + filter_data, &bias_dims, bias_data, &output_dims, output_data); + assert(res == ARM_MATH_SUCCESS); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h new file mode 100644 index 000000000..f84742a44 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include <tensorflow/lite/kernels/internal/reference/l2normalization.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h new file mode 100644 index 000000000..38a302fc6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h new file mode 100644 index 000000000..9ccd2224f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h> + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h new file mode 100644 index 000000000..347a97a83 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include <tensorflow/lite/kernels/internal/reference/mul.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template <typename T> +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h new file mode 100644 index 000000000..be5903a0c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include <tensorflow/lite/kernels/internal/reference/neg.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h new file mode 100644 index 000000000..6046789ae --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template <typename Input, typename Output> +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h new file mode 100644 index 000000000..cc9f0fd54 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h new file mode 100644 index 000000000..f4d5a6ed3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h new file mode 100644 index 000000000..a4a5b2a78 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include <arm_nn_types.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t memory_size = weight_time_shape.Dims(1); + + cmsis_nn_dims input_dims; + input_dims.n = input_shape.Dims(0); + input_dims.h = input_shape.Dims(1); + + cmsis_nn_dims weights_feature_dims; + weights_feature_dims.n = weight_feature_shape.Dims(0); + weights_feature_dims.h = weight_feature_shape.Dims(1); + + cmsis_nn_dims weights_time_dims; + weights_time_dims.n = weight_time_shape.Dims(0); + weights_time_dims.h = weight_time_shape.Dims(1); + + cmsis_nn_dims bias_dims; + bias_dims.n = bias_shape.Dims(0); + + cmsis_nn_dims state_dims; + state_dims.n = batch_size; + state_dims.h = memory_size * num_filters; + + cmsis_nn_dims output_dims; + output_dims.n = output_shape.Dims(0); + output_dims.h = output_shape.Dims(1); + + cmsis_nn_svdf_params svdf_params; + svdf_params.rank = params.rank; + svdf_params.input_offset = input_zp; + svdf_params.output_offset = output_zp; + + svdf_params.input_activation.min = INT16_MIN; + svdf_params.input_activation.max = INT16_MAX; + + svdf_params.output_activation.min = INT8_MIN; + svdf_params.output_activation.max = INT8_MAX; + + cmsis_nn_per_tensor_quant_params in_quant_params; + in_quant_params.multiplier = scale_1_a; + in_quant_params.shift = scale_1_b; + + cmsis_nn_per_tensor_quant_params out_quant_params; + out_quant_params.multiplier = scale_2_a; + out_quant_params.shift = scale_2_b; + + cmsis_nn_context scratch_ctx; + scratch_ctx.buf = scratchpad_data; + + cmsis_nn_context scratch_output_ctx; + scratch_output_ctx.buf = output_temp_data; + + arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params, + &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims, + weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data, + &output_dims, output_data); +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t input_size = input_shape.Dims(1); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t num_units = num_filters / rank; + const int32_t memory_size = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + float *new_state_start = activation_state_data; + const float *old_state_start = activation_state_data + 1; + const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float *matrix = weight_feature_data; + const float *vector = input_data; + float *result = &activation_state_data[memory_size - 1]; + float *result_in_batch = result; + for (int i = 0; i < batch_size; ++i) + { + const float *matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) + { + float dot_prod = 0.0f; + const float *vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) + { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + tflite::reference_ops::ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data, + params.activation, activation_state_data, scratchpad_data, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not supported for cmsisnn"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h new file mode 100644 index 000000000..6bbda4867 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include <tensorflow/lite/kernels/internal/reference/softmax.h> +#include <arm_nnfunctions.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + // Do nothing for mcu + (void)data; + (void)input_scale; + (void)beta; +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, &input_beta_left_shift); + + params->input_multiplier = input_beta_multiplier; + params->input_left_shift = input_beta_left_shift; + params->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift); +} + +template <typename T> +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + // MARK: At this moment this operation doesn't support on mcu + assert(false && "Softmax NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; +} + +template <> +inline void Softmax<int8_t>(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + const int trailing_dim = input_shape.DimensionsCount() - 1; + const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); + const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); + const int32_t mult = params.input_multiplier; + const int32_t shift = params.input_left_shift; + const int32_t diff_min = params.diff_min; + + arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h new file mode 100644 index 000000000..fdddaa929 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h new file mode 100644 index 000000000..816b7f663 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h new file mode 100644 index 000000000..ea57578c6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include <tensorflow/lite/kernels/internal/reference/sub.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake new file mode 100644 index 000000000..a68b363d9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake @@ -0,0 +1,65 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + nnas_find_package(CMSISSource EXACT 5.8.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + + if (NOT CMSISSource_FOUND) + message(STATUS "Skipping luci-interpreter: CMSISSource not found") + return() + endif () + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c") + list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc) + add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES}) + set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN) + target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC + "${CMSISSource_DIR}/CMSIS/NN/Include" + "${CMSISSource_DIR}/CMSIS/DSP/Include" + "${CMSISSource_DIR}/CMSIS/Core/Include") + + target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal) +endmacro() diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst new file mode 100644 index 000000000..8e20559f9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -0,0 +1,77 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchMatMul) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Gather) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LocalResponseNormalization) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(LogSoftmax) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Mean) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(OneHot) +REGISTER_KERNEL(Pack) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(Pow) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(Relu) +REGISTER_KERNEL(Relu6) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(ReverseV2) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) +REGISTER_KERNEL(Slice) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(Split) +REGISTER_KERNEL(SplitV) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(Unpack) +REGISTER_KERNEL(While) diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h new file mode 100644 index 000000000..21e63296d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h> + +namespace luci_interpreter_pal +{ +template <typename T1, typename T2, typename T3> +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater<T1> cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h new file mode 100644 index 000000000..cce30601f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool<int8_t>(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + + tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)input_data_type; + (void)input_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h new file mode 100644 index 000000000..3894f2d92 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H +#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H + +#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h> + +namespace luci_interpreter_pal +{ +inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data, + const tflite::RuntimeShape &rhs_shape, const float *rhs_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad, + luci_interpreter::Tensor *rhs_scratchpad, + const tflite::RuntimeShape &lhs_shape, + const tflite::RuntimeShape &rhs_shape) +{ + // Scratchpad for transposed LHS + { + auto lhs_rank = lhs_shape.DimensionsCount(); + luci_interpreter::Shape scratchpad_size(lhs_rank); + for (int i = 0; i < lhs_rank - 2; ++i) + { + scratchpad_size.dim(i) = lhs_shape.Dims(i); + } + scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1); + scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2); + + lhs_scratchpad->resize(scratchpad_size); + } + // Scratchpad for transposed RHS + { + auto rhs_rank = rhs_shape.DimensionsCount(); + luci_interpreter::Shape scratchpad_size(rhs_rank); + for (int i = 0; i < rhs_rank - 2; ++i) + { + scratchpad_size.dim(i) = rhs_shape.Dims(i); + } + scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1); + scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2); + + rhs_scratchpad->resize(scratchpad_size); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h new file mode 100644 index 000000000..3fe2022ed --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h new file mode 100644 index 000000000..985a15f39 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h> + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) +{ + (void)scratchpad_shape; + if (scratchpad_data) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + tflite::RuntimeShape im2col_shape{batches, output_height, output_width, + input_depth * filter_height * filter_width}; + + tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, im2col_shape, + scratchpad_data); + } + else + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) +{ + // TODO This should only be done once (although it takes only a few microseconds). + // Also, the user should be able to adjust the number of threads. + auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>(); + gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency())); + + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, gemmlowp_context.get()); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + // TODO enable optimized version + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + const int32_t filter_height = filter_shape.Dims(1); + const int32_t filter_width = filter_shape.Dims(2); + + // Allocate tensor for scratchpad, if needed. + // The checks here should be aligned with the actual implementation. + const bool need_dilated_scratchpad = + params.dilation_height_factor != 1 || params.dilation_width_factor != 1; + const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 || + filter_height != 1 || filter_width != 1; + auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 && + (need_dilated_scratchpad || need_non_dilated_scratchpad); + + if (_need_scratchpad) + { + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3); + const int32_t output_height = output_shape.Dims(1); + const int32_t output_width = output_shape.Dims(2); + + auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type)); + int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height * + filter_width * data_type_size; + luci_interpreter::Shape scratchpad_shape{scratchpad_size}; + scratchpad->resize(scratchpad_shape); + } + else + { + scratchpad->set_allocatable(false); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h new file mode 100644 index 000000000..f9ebfcfb5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h new file mode 100644 index 000000000..c9d1a2948 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel<int8_t>( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)params; + (void)input_data_type; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h new file mode 100644 index 000000000..3af6d0777 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h new file mode 100644 index 000000000..cb365ffd0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h new file mode 100644 index 000000000..62970dbf7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected<int8_t>(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h new file mode 100644 index 000000000..49ac35f93 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_GATHER_H +#define LUCI_INTERPRETER_PAL_GATHER_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T, typename CoordsT = int32> +static inline void Gather(const tflite::GatherParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_GATHER_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h new file mode 100644 index 000000000..6c663e21f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h new file mode 100644 index 000000000..aac57f2b2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h new file mode 100644 index 000000000..e8209bae6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h new file mode 100644 index 000000000..54f7f0916 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H +#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void +LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h new file mode 100644 index 000000000..a32e3eec6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H +#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta); +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + // Do nothing for linux + (void)params; + (void)input_scale; + (void)beta; +} + +static inline void LogSoftmax(const tflite::SoftmaxParams ¶ms, float input_scale, + const tflite::RuntimeShape &input_shape, const uint8 *input_data, + const tflite::RuntimeShape &output_shape, uint8 *output_data) +{ + tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h new file mode 100644 index 000000000..a8a9d4abc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} + +template <> +inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const int64_t *input1_data, const tflite::RuntimeShape &input2_shape, + const int64_t *input2_data, const tflite::RuntimeShape &output_shape, + int64_t *output_data) +{ + tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template <typename T> +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h new file mode 100644 index 000000000..797ffee1b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h new file mode 100644 index 000000000..bf1d7954e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template <typename Input, typename Output> +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h new file mode 100644 index 000000000..b4c715d3e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RELU_H +#define LUCI_INTERPRETER_PAL_RELU_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data); +} + +template <typename T> +static inline void ReluX(const tflite::ReluParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h new file mode 100644 index 000000000..bf2f91aa5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RELU6_H +#define LUCI_INTERPRETER_PAL_RELU6_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data); +} + +template <typename T> +static inline void ReluX(const tflite::ReluParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RELU6_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h new file mode 100644 index 000000000..7380081dc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h new file mode 100644 index 000000000..74d19265b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h new file mode 100644 index 000000000..0ffba14f0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include <tensorflow/lite/kernels/internal/reference/svdf.h> + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + tflite::reference_ops::EvalIntegerSVDF(¶ms, input_shape, input_data, weight_feature_shape, + weight_feature_data, weight_time_shape, weight_time_data, + bias_shape, bias_data, activation_state_data, output_shape, + output_data, scratchpad_data, output_temp_data, scale_1_a, + scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp); +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::EvalFloatSVDF(¶ms, input_shape, input_data, weight_feature_shape, + weight_feature_data, weight_time_shape, weight_time_data, + bias_shape, bias_data, scratchpad_data, + activation_state_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not currently supported for linux platform"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h new file mode 100644 index 000000000..640a71684 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SLICE_H +#define LUCI_INTERPRETER_PAL_SLICE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Slice(const tflite::SliceParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SLICE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h new file mode 100644 index 000000000..b197e79d1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta); +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + // Do nothing for linux + (void)params; + (void)input_scale; + (void)beta; +} + +template <typename In, typename Out> +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const In *input_data, + const tflite::RuntimeShape &output_shape, Out *output_data) +{ + tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h new file mode 100644 index 000000000..5e8de9ba3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h new file mode 100644 index 000000000..52d2a5bb1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h new file mode 100644 index 000000000..4d8da72d8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPLIT_H +#define LUCI_INTERPRETER_PAL_SPLIT_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename Scalar> +static inline void Split(const tflite::SplitParams ¶ms, const tflite::RuntimeShape &input_shape, + const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes, + Scalar *const *output_data) +{ + tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPLIT_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h new file mode 100644 index 000000000..04080d619 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake new file mode 100644 index 000000000..185700cf9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake @@ -0,0 +1,82 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + + find_package(Threads REQUIRED) + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} SYSTEM PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + # TODO put it back, I changed my mind. + # instead add sources with visitors in this library + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) + + if(BUILD_ARM32_NEON) + # NOTE may need to revise this list for version upgrade + set(PAL_SOURCES ${PAL_SOURCES} + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc + ${TensorFlowRuySource_DIR}/ruy/allocator.cc + ${TensorFlowRuySource_DIR}/ruy/block_map.cc + ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc + ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc + ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc + ${TensorFlowRuySource_DIR}/ruy/ctx.cc + ${TensorFlowRuySource_DIR}/ruy/denormal.cc + ${TensorFlowRuySource_DIR}/ruy/frontend.cc + ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc + ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc + ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc + ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc + ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc + ${TensorFlowRuySource_DIR}/ruy/trmul.cc + ${TensorFlowRuySource_DIR}/ruy/tune.cc + ${TensorFlowRuySource_DIR}/ruy/wait.cc + ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc + ) + endif(BUILD_ARM32_NEON) + + add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES}) + set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal) +endmacro() diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst new file mode 100644 index 000000000..f0df58db3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -0,0 +1,62 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Dequantize) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(ExpandDims) +REGISTER_KERNEL(Fill) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Quantize) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Shape) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(SVDF) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(While) diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h new file mode 100644 index 000000000..21e63296d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h> + +namespace luci_interpreter_pal +{ +template <typename T1, typename T2, typename T3> +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater<T1> cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h new file mode 100644 index 000000000..cce30601f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void AveragePool(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data, + const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data) +{ + { + // MARK: At this moment this operation doesn't support + assert(false && "AveragePool NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void AveragePool<int8_t>(const tflite::PoolParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, + int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + + tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)input_data_type; + (void)input_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h new file mode 100644 index 000000000..4dd77ffdc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h new file mode 100644 index 000000000..13976877a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/conv.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h> + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &scratchpad_shape, + float *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + uint8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, scratchpad_shape, + scratchpad_data, nullptr); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &scratchpad_shape, + int8 *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const luci_interpreter::DataType &input_data_type, + const tflite::ConvParams ¶ms, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) +{ + (void)input_data_type; + (void)params; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h new file mode 100644 index 000000000..8463e571e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h new file mode 100644 index 000000000..c9d1a2948 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +DepthwiseConvPerChannel(const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "DepthwiseConvPerChannel NYI"); + (void)params; + (void)output_multiplier; + (void)output_shift; + (void)input_shape; + (void)output_data; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + (void)scratchpad_shape; + (void)scratchpad_data; + } +} + +template <> +inline void DepthwiseConvPerChannel<int8_t>( + const tflite::DepthwiseParams ¶ms, const int32_t *output_multiplier, + const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, + const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); +} + +static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, + const tflite::DepthwiseParams ¶ms, + const luci_interpreter::DataType &input_data_type, + const tflite::RuntimeShape &input_shape, + const tflite::RuntimeShape &filter_shape, + const tflite::RuntimeShape &output_shape) + +{ + (void)params; + (void)input_data_type; + (void)input_shape; + (void)filter_shape; + (void)output_shape; + + scratchpad->set_allocatable(false); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h new file mode 100644 index 000000000..15ff0327b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H +#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ + +template <typename T> +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape, + output_data); +} + +static inline void Dequantize(tflite::DequantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const uint8_t *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h new file mode 100644 index 000000000..4089d0a0c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include <tensorflow/lite/kernels/internal/reference/elu.h> + +namespace luci_interpreter_pal +{ + +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h new file mode 100644 index 000000000..048624d74 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H +#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H + +#include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void FullyConnected(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &filter_shape, const T *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + { + // MARK: At this moment this operation is not supported + assert(false && "FullyConnected NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)filter_shape; + (void)filter_data; + (void)bias_shape; + (void)bias_data; + (void)output_shape; + (void)output_data; + } +} + +template <> +inline void +FullyConnected<int8_t>(const tflite::FullyConnectedParams ¶ms, + const tflite::RuntimeShape &input_shape, const int8_t *input_data, + const tflite::RuntimeShape &filter_shape, const int8_t *filter_data, + const tflite::RuntimeShape &bias_shape, const int32_t *bias_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data) +{ + tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h new file mode 100644 index 000000000..f84742a44 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include <tensorflow/lite/kernels/internal/reference/l2normalization.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h new file mode 100644 index 000000000..38a302fc6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h new file mode 100644 index 000000000..9ccd2224f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h> + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h new file mode 100644 index 000000000..347a97a83 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include <tensorflow/lite/kernels/internal/reference/mul.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +template <typename T> +static inline void +BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const T *input1_data, const tflite::RuntimeShape &input2_shape, + const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h new file mode 100644 index 000000000..be5903a0c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include <tensorflow/lite/kernels/internal/reference/neg.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h new file mode 100644 index 000000000..6046789ae --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H +#define LUCI_INTERPRETER_PAL_QUANTIZE_H + +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Quantize(tflite::QuantizationParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data); +} + +template <typename Input, typename Output> +static inline void Requantize(const Input *input_data, int32_t size, + int32_t effective_scale_multiplier, int32_t effective_scale_shift, + int32_t input_zero_point, int32_t output_zero_point, + Output *output_data) +{ + tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier, + effective_scale_shift, input_zero_point, output_zero_point, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h new file mode 100644 index 000000000..cc9f0fd54 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h new file mode 100644 index 000000000..f4d5a6ed3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h new file mode 100644 index 000000000..3bba668fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SVDF_H +#define LUCI_INTERPRETER_PAL_SVDF_H + +#include <tensorflow/lite/kernels/internal/reference/svdf.h> + +namespace luci_interpreter_pal +{ +static inline void +IntegerSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape, + const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape, + const int32_t *bias_data, int16_t *activation_state_data, + const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data, + int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a, + int scale_2_b, int32_t input_zp, int32_t output_zp) +{ + const int n_rank = params.rank; + const int n_batch = input_shape.Dims(0); + const int n_input = input_shape.Dims(1); + const int n_filter = weight_feature_shape.Dims(0); + const int n_unit = n_filter / n_rank; + const int n_memory = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + int16_t *new_state_start = activation_state_data; + const int16_t *old_state_start = activation_state_data + 1; + const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Feature matmul. + { + const int32_t output_max = std::numeric_limits<int16_t>::max(); + const int32_t output_min = std::numeric_limits<int16_t>::min(); + int16_t *result_in_batch = activation_state_data + (n_memory - 1); + for (int b = 0; b < n_batch; b++) + { + const int8_t *matrix_ptr = weight_feature_data; + for (int r = 0; r < n_filter; r++) + { + int32_t dot_prod = 0; + const int8_t *vector_in_batch = input_data + b * n_input; + for (int c = 0; c < n_input; c++) + { + dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp); + } + dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b); + dot_prod = std::min(std::max(output_min, dot_prod), output_max); + // This assumes state is symmetrically quantized. Otherwise last bit of + // state should be initialized to its zero point and accumulate the + // dot_prod. + // Equivalent as the following: + // result_in_batch = zero point, which happens to be zero. + // result_in_batch += dot_prod_56. + *result_in_batch = dot_prod; + result_in_batch += n_memory; + } + } + } + + // Time. + { + for (int b = 0; b < n_batch; ++b) + { + int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter; + + // Perform batched vector dot product: + const int16_t *vector1_ptr = weight_time_data; + const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter; + + for (int i = 0; i < n_filter; i++) + { + *scratch_ptr_batch = 0; + for (int j = 0; j < n_memory; j++) + { + *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++; + } + scratch_ptr_batch++; + } + } + } + + // Reduce, add bias, rescale, activation. + { + // Add bias. + if (bias_data) + { + // Vector batch assign: + for (int i = 0; i < n_batch; ++i) + { + int32_t *output_ptr = output_temp_data + i * n_unit; + const int32_t *bias_ptr = bias_data; + for (int j = 0; j < n_unit; ++j) + { + *output_ptr++ = *bias_ptr++; + } + } + } + else + { + int32_t *output_ptr = output_temp_data; + for (int i = 0; i < n_batch * n_unit; ++i) + { + *output_ptr++ = 0; + } + } + + // Reduce. + for (int b = 0; b < n_batch; ++b) + { + int32_t *output_temp_ptr = output_temp_data + b * n_unit; + int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter; + + // Reduction sum vector + for (int i = 0; i < n_unit; ++i) + { + for (int j = 0; j < n_rank; ++j) + { + output_temp_ptr[i] += *scratch_ptr_batch++; + } + } + } + + // Rescale. + const int32_t output_max = std::numeric_limits<int8_t>::max(); + const int32_t output_min = std::numeric_limits<int8_t>::min(); + for (int i = 0; i < n_batch * n_unit; ++i) + { + int32_t x1 = output_temp_data[i]; + int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b); + int32_t x3 = x2 + output_zp; + int32_t x4 = std::min(std::max(output_min, x3), output_max); + output_data[i] = static_cast<int8_t>(x4); + } + } +} +static inline void +FloatSVDF(const TfLiteSVDFParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &weight_feature_shape, + const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape, + const float *weight_time_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, float *scratchpad_data, float *activation_state_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + const int32_t rank = params.rank; + const int32_t batch_size = input_shape.Dims(0); + const int32_t input_size = input_shape.Dims(1); + const int32_t num_filters = weight_feature_shape.Dims(0); + const int32_t num_units = num_filters / rank; + const int32_t memory_size = weight_time_shape.Dims(1); + + // Left shift the activation_state. + { + float *new_state_start = activation_state_data; + const float *old_state_start = activation_state_data + 1; + const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size; + while (old_state_start != old_state_end) + { + *new_state_start++ = *old_state_start++; + } + } + + // Note: no need to clear the latest activation, matmul is not accumulative. + + // Compute conv1d(inputs, weights_feature). + // The activation_state's rightmost column is used to save current cycle + // activation. This is achieved by starting at state_ptr[memory_size - 1] and + // having the stride equal to memory_size. + + // Perform batched matrix vector multiply operation: + { + const float *matrix = weight_feature_data; + const float *vector = input_data; + float *result = &activation_state_data[memory_size - 1]; + float *result_in_batch = result; + for (int i = 0; i < batch_size; ++i) + { + const float *matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) + { + float dot_prod = 0.0f; + const float *vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) + { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } + } + + tflite::reference_ops::ApplyTimeWeightsBiasAndActivation( + batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data, + params.activation, activation_state_data, scratchpad_data, output_data); +} + +static inline void SetupScratchpadTensor( + const luci_interpreter::DataType &input_data_type, + const luci_interpreter::DataType &weight_feature_data_type, + luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2, + luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4, + luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6, + const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape, + const int32_t batch_size, const int32_t num_filters, const int32_t num_units) +{ + + if (input_data_type == loco::DataType::FLOAT32 && + (weight_feature_data_type == loco::DataType::S8 || + weight_feature_data_type == loco::DataType::U8)) + { + (void)input_shape; + (void)weight_time_shape; + (void)scratchpad_3; + (void)scratchpad_4; + (void)scratchpad_5; + (void)scratchpad_6; + + throw std::runtime_error("Hybrid type is not currently supported for mcu platform"); + } + + // Resize scratchpad_1 tensor + scratchpad_1->resize({batch_size, num_filters}); + + if (input_data_type == loco::DataType::S8) + { + // Resize scratchpad_2 for full_integer op + scratchpad_2->resize({batch_size, num_units}); + } +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h new file mode 100644 index 000000000..9838b542d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include <tensorflow/lite/kernels/internal/reference/softmax.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + // Do nothing for mcu + (void)data; + (void)input_scale; + (void)beta; +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, &input_beta_left_shift); + + params->input_multiplier = input_beta_multiplier; + params->input_left_shift = input_beta_left_shift; + params->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift); +} + +template <typename T> +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + // MARK: At this moment this operation doesn't support on mcu + assert(false && "Softmax NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h new file mode 100644 index 000000000..fdddaa929 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h new file mode 100644 index 000000000..816b7f663 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h new file mode 100644 index 000000000..ea57578c6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include <tensorflow/lite/kernels/internal/reference/sub.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake new file mode 100644 index 000000000..907d51de6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake @@ -0,0 +1,56 @@ +macro(initialize_pal) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) + + if (NOT TensorFlowSource_FOUND) + message(STATUS "Skipping luci-interpreter: TensorFlow not found") + return() + endif () + + if (NOT TensorFlowGEMMLowpSource_FOUND) + message(STATUS "Skipping luci-interpreter: gemmlowp not found") + return() + endif () + + if (NOT TensorFlowEigenSource_FOUND) + message(STATUS "Skipping luci-interpreter: Eigen not found") + return() + endif () + + if (NOT TensorFlowRuySource_FOUND) + message(STATUS "Skipping luci-interpreter: Ruy not found") + return() + endif () + #find_package(Threads REQUIRED) + + set(PAL_INITIALIZED TRUE) +endmacro() + +macro(add_pal_to_target TGT) + target_include_directories(${TGT} PRIVATE "${PAL}") + target_include_directories(${TGT} PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}") + target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR}) + + # TODO put it back, I changed my mind. + # instead add sources with visitors in this library + set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc + ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc) + add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES}) + set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(luci_interpreter_mcu_pal PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) + + target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal) + #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal) +endmacro() diff --git a/compiler/luci-micro/luci-interpreter/requires.cmake b/compiler/luci-micro/luci-interpreter/requires.cmake new file mode 100644 index 000000000..f411f387a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/requires.cmake @@ -0,0 +1 @@ +require(luci) diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp new file mode 100644 index 000000000..6ad1f320c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" + +namespace luci_interpreter +{ + +BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize) +{ + int32_t p = lowerLog2(memSize); + + // We assume that the requested size of memory does not exceed 4 GB + assert(p < 32); + memSize = 1 << p; + + _start_block = reinterpret_cast<Block *>(memory_start); + _start_block->size = memSize - sizeof(Block); + _start_block->is_free = true; + _start_block->self = _start_block; + _num_blocks = 0; + _size = _start_block->size; + + for (auto &_free_block : _free_blocks) + _free_block = nullptr; + + addToBlocks(_start_block, p); +} + +void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + const size_t element_size = getDataTypeSize(tensor.element_type()); + const int32_t num_elements = tensor.shape().num_elements(); + auto size = num_elements * element_size; + auto footprint = size + sizeof(Block); + auto l = (footprint & (footprint - 1)) == 0 + ? lowerLog2(footprint) + : lowerLog2(footprint) + 1; // check footprint is pow_of_2 + + while (l < 32 && !_free_blocks[l]) + l++; + + assert(l < 32); + + Block *tmp; + tmp = _free_blocks[l]; + removeFromBlocks(tmp, l); + + while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block)) + { + divideBlock(tmp, l); + l--; + } + + tmp->is_free = false; + tmp->self = tmp; + _num_blocks++; + + auto *data = (uint8_t *)(tmp + 1); + tensor.set_data_buffer(data); +} + +void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + auto data = tensor.data<void>(); + auto *tmp = (Block *)((uint8_t *)data - sizeof(Block)); + + assert(tmp->self == tmp); + + tmp->is_free = true; + addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block))); + + while (tmp) + if (tmp->size == _size) + break; + else + tmp = mergeBlock(tmp); + + _num_blocks--; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp new file mode 100644 index 000000000..29fb767b7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" +#include <gtest/gtest.h> + +namespace luci_interpreter +{ +namespace +{ + +using namespace testing; + +TEST(BuddyMemoryManager, basic) +{ + auto mem_pool = std::make_unique<uint8_t[]>(200); + auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130); + Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor"); + + buddy_memory_manager->allocate_memory(first_tensor); + + uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + + first_tensor.writeData(data_1, 8); + uint8_t array_1[8]; + first_tensor.readData(array_1, 8); + for (int i = 0; i < 8; i++) + { + EXPECT_EQ(data_1[i], array_1[i]); + } + + Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor"); + buddy_memory_manager->allocate_memory(second_tensor); + + uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}}; + second_tensor.writeData(data_2, 10); + + uint8_t array_2[2][5]; + second_tensor.readData(array_2, 10); + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 5; j++) + { + EXPECT_EQ(data_2[i][j], array_2[i][j]); + } + } + + buddy_memory_manager->release_memory(first_tensor); + EXPECT_EQ(first_tensor.data<void>(), nullptr); + + buddy_memory_manager->release_memory(second_tensor); + EXPECT_EQ(second_tensor.data<void>(), nullptr); +} + +} // namespace +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt new file mode 100644 index 000000000..997b75a84 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt @@ -0,0 +1,61 @@ +include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake") + +initialize_pal() + +if (NOT PAL_INITIALIZED) + message("PAL Failed to initialize, skip luci-interpreter") + return() +endif() + +message(STATUS "LUCI INTERPRETER BEGIN") + +set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}") + +add_subdirectory(core) +message(STATUS "LUCI INTERPRETER CORE") +add_subdirectory(kernels) +message(STATUS "LUCI INTERPRETER KERNELS") +add_subdirectory(loader) +message(STATUS "LUCI INTERPRETER LOADER") +add_subdirectory(import) +message(STATUS "LUCI INTERPRETER IMPORT") + +message(STATUS "LUCI INTERPTER INITALIZED") + +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" + Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp) + +if (NOT LUCI_INTERPRETER_STATIC) + add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES}) +else () + add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES}) +endif () + +set(TEST_SOURCES BuddyMemoryManager.test.cpp) + +target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_BINARY} + PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE} + PRIVATE nncc_common) + +install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib) +install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(buddy_manager_test ${TEST_SOURCES}) +target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY}) diff --git a/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp new file mode 100644 index 000000000..8cf272efd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/Interpreter.h" +#include "luci_interpreter/SimpleMemoryManager.h" + +#include "loader/ModuleLoader.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace +{ + +class EventNotifierImpl final : public EventNotifier +{ +public: + EventNotifierImpl(const RuntimeToIR &runtime_to_ir, + const std::vector<ExecutionObserver *> &observers) + : _runtime_to_ir(runtime_to_ir), _observers(observers) + { + } + + void postTensorWrite(const Tensor *tensor) override + { + assert(tensor != nullptr); + for (const auto &observer : _observers) + { + observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor); + } + } + + void preOperatorExecute(const Kernel *kernel) override + { + assert(kernel != nullptr); + for (const auto &observer : _observers) + { + observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel)); + } + } + + void postOperatorExecute(const Kernel *kernel) override + { + assert(kernel != nullptr); + for (const auto &observer : _observers) + { + observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel)); + } + } + +private: + const RuntimeToIR &_runtime_to_ir; + const std::vector<ExecutionObserver *> &_observers; +}; + +} // namespace + +Interpreter::Interpreter(const luci::Module *module) +{ + _runtime_to_ir = std::make_unique<RuntimeToIR>(); + _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); + _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); + + _default_memory_manager = std::make_unique<SimpleMemoryManager>(); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + _default_memory_manager.get()); + loader.load(); +} + +Interpreter::Interpreter(const luci::Module *module, + luci_interpreter::IMemoryManager *memory_manager) +{ + assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead"); + + _runtime_to_ir = std::make_unique<RuntimeToIR>(); + _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); + _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + memory_manager); + loader.load(); +} + +Interpreter::~Interpreter() = default; + +void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data, + size_t data_size) +{ + Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()]; + if (tensor == nullptr) + { + const std::string &name = input_node->name(); + throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\"."); + } + if (data != nullptr) + tensor->writeData(data, data_size); +} + +void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data, + size_t data_size) +{ + Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()]; + if (tensor == nullptr) + { + const std::string &name = output_node->name(); + throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\"."); + } + if (data != nullptr) + tensor->readData(data, data_size); +} + +void Interpreter::interpret() { _runtime_module->execute(); } + +void Interpreter::attachObserver(ExecutionObserver *observer) +{ + if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend()) + throw std::runtime_error("Observer is already attached."); + _observers.push_back(observer); +} + +ExecutionObserver::~ExecutionObserver() = default; + +void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {} + +void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {} + +void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp new file mode 100644 index 000000000..230e39896 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/SimpleMemoryManager.h" + +namespace luci_interpreter +{ + +void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + tensor.set_data_buffer(data); +} + +void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_data_allocated()) + { + tensor.set_data_buffer(nullptr); + return; + } + auto data = tensor.data<uint8_t>(); + delete[] data; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp new file mode 100644 index 000000000..73a819919 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/StaticMemoryManager.h" + +namespace luci_interpreter +{ + +void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + int32_t offset = tensor.get_offset(); + assert(offset >= 0); + auto tensor_ptr = _buffer_ptr + offset; + tensor.set_data_buffer(tensor_ptr); +} + +void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp new file mode 100644 index 000000000..3beeee55c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ + +void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + allocations.push_back(data); + tensor.set_data_buffer(data); +} + +void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt new file mode 100644 index 000000000..c2471e01c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt @@ -0,0 +1,19 @@ +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h" + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h" + EventNotifier.h + Kernel.h + KernelParams.h + RuntimeGraph.h + RuntimeGraph.cpp + RuntimeModule.h + Tensor.cpp) + +add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang) +target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common) diff --git a/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h new file mode 100644 index 000000000..5c4fbd3be --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H +#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H + +namespace luci_interpreter +{ + +// Used at execution stage to tell the interpreter that the runtime state has changed in some way. +class EventNotifier +{ +public: + virtual ~EventNotifier() = default; + + virtual void postTensorWrite(const Tensor *tensor) = 0; + virtual void preOperatorExecute(const Kernel *kernel) = 0; + virtual void postOperatorExecute(const Kernel *kernel) = 0; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/Kernel.h b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h new file mode 100644 index 000000000..a7c4a4218 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_KERNEL_H +#define LUCI_INTERPRETER_CORE_KERNEL_H + +#include "luci_interpreter/core/Tensor.h" + +#include <vector> + +namespace luci_interpreter +{ + +// Base class for all kernels. +class Kernel +{ +protected: + Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs) + : _inputs(std::move(inputs)), _outputs(std::move(outputs)) + { + } + +public: + virtual ~Kernel() = default; + + const std::vector<const Tensor *> &getInputTensors() const { return _inputs; } + const std::vector<Tensor *> &getOutputTensors() const { return _outputs; } + + // Configures the kernel. + // This function is currently called once for each kernel during interpreter construction, + // which makes it a convenient place for preparing (resizing) output tensors. + virtual void configure() = 0; + + // Executes the kernel. + virtual void execute() const = 0; + +protected: + // NOTE Prefer not to use these in derived classes. + const std::vector<const Tensor *> _inputs; + const std::vector<Tensor *> _outputs; +}; + +// Base class for kernels with parameters. +template <typename Params> class KernelWithParams : public Kernel +{ +protected: + KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, + const Params ¶ms) + : Kernel(std::move(inputs), std::move(outputs)), _params(params) + { + } + +public: + const Params ¶ms() const { return _params; } + +protected: + const Params _params; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_KERNEL_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h new file mode 100644 index 000000000..6c0220c62 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H +#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H + +#include <luci/IR/AttrPadding.h> +#include <luci/IR/AttrFusedActFunc.h> +#include <luci/IR/AttrMirrorPadMode.h> +#include <luci_interpreter/core/DataType.h> + +#include <cstdint> +#include <vector> + +namespace luci_interpreter +{ + +// Inject commonly used types into `luci_interpreter` namespace for convenience. +using Activation = luci::FusedActFunc; +using Padding = luci::Padding; +using MirrorPadMode = luci::MirrorPadMode; + +struct AddParams +{ + Activation activation; +}; + +struct ArgMaxParams +{ + DataType output_type; +}; + +struct BatchMatMulParams +{ + bool adj_x; + bool adj_y; +}; + +struct ConcatenationParams +{ + int axis; + Activation activation; +}; + +struct Conv2DParams +{ + Padding padding; + int32_t stride_height; + int32_t stride_width; + int32_t dilation_height_factor; + int32_t dilation_width_factor; + Activation activation; +}; + +struct DepthToSpaceParams +{ + int block_size; +}; + +struct DepthwiseConv2DParams +{ + Padding padding; + int32_t depth_multiplier; // TODO Remove, as it can be calculated. + int32_t stride_height; + int32_t stride_width; + int32_t dilation_height_factor; + int32_t dilation_width_factor; + Activation activation; +}; + +struct DivParams +{ + Activation activation; +}; + +struct FullyConnectedParams +{ + Activation activation; + bool keep_num_dims = false; +}; + +struct GatherParams +{ + int32_t axis; + int32_t batch_dims; +}; + +struct InstanceNormParams +{ + float epsilon; + Activation activation; +}; + +struct L2NormParams +{ + Activation activation; +}; + +struct LeakyReluParams +{ + float alpha; +}; + +struct LocalResponseNormalizationParams +{ + int32_t radius; + float bias; + float alpha; + float beta; +}; + +struct MirrorPadParams +{ + MirrorPadMode mode; +}; + +struct MulParams +{ + Activation activation; +}; + +struct OneHotParams +{ + int32_t axis; +}; + +struct PackParams +{ + int32_t values_count; + int32_t axis; +}; + +struct Pool2DParams +{ + Padding padding; + int32_t filter_height; + int32_t filter_width; + int32_t stride_height; + int32_t stride_width; + Activation activation; +}; + +struct ReducerParams +{ + bool keep_dims; +}; + +struct ResizeBilinearParams +{ + bool align_corners; + bool half_pixel_centers; +}; + +struct ResizeNearestNeighborParams +{ + bool align_corners; + bool half_pixel_centers; +}; + +struct ShapeParams +{ + loco::DataType out_type; +}; + +struct SubParams +{ + Activation activation; +}; + +struct SVDFParams +{ + bool asymmetric_quantize_inputs; + int32_t svdf_rank; + Activation activation; +}; + +struct SpaceToDepthParams +{ + int block_size; +}; + +struct SoftmaxParams +{ + float beta; +}; + +struct StridedSliceParams +{ + int32_t begin_mask; + int32_t end_mask; + int32_t ellipsis_mask; + int32_t new_axis_mask; + int32_t shrink_axis_mask; +}; + +struct SqueezeParams +{ + std::vector<int32_t> squeeze_dims; +}; + +struct TransposeConvParams +{ + Padding padding; + int32_t stride_height; + int32_t stride_width; +}; + +struct UnpackParams +{ + int axis; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp new file mode 100644 index 000000000..c2f8d2ea8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeGraph.h" + +#include "core/RuntimeModule.h" + +#include <algorithm> +#include <unordered_map> + +namespace luci_interpreter +{ + +class RuntimeGraph::TensorAllocPlan +{ + std::vector<std::vector<Tensor *>> _alloc_plan; + std::vector<std::vector<Tensor *>> _dealloc_plan; + bool _valid = false; + IMemoryManager *_memory_manager; + +public: + explicit TensorAllocPlan(IMemoryManager *memory_manager); + void invalidate() { _valid = false; } + bool isValid() const { return _valid; } + void build(const RuntimeGraph &graph); + void allocate(size_t kernel_index) const; + void deallocate(size_t kernel_index) const; +}; + +RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager) + : _memory_manager(memory_manager) +{ +} + +void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph) +{ + invalidate(); + using Lifetime = std::pair<size_t, size_t>; + std::unordered_map<Tensor *, Lifetime> lifetimes; + const size_t num_kernels = graph._kernels.size(); + for (size_t index = 0; index < num_kernels; ++index) + { + const auto &kernel = graph._kernels[index]; + for (const Tensor *tensor : kernel->getInputTensors()) + { + auto nc_tensor = const_cast<Tensor *>(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = index; + } + for (Tensor *tensor : kernel->getOutputTensors()) + { + assert(lifetimes.count(tensor) == 0); + lifetimes[tensor] = Lifetime(index, index); + } + } + for (const Tensor *tensor : graph.getOutputTensors()) + { + auto nc_tensor = const_cast<Tensor *>(tensor); + if (lifetimes.count(nc_tensor) > 0) + lifetimes.at(nc_tensor).second = num_kernels; + } + _alloc_plan.assign(num_kernels, std::vector<Tensor *>()); + _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>()); + for (const auto &item : lifetimes) + { + _alloc_plan[item.second.first].push_back(item.first); + _dealloc_plan[item.second.second].push_back(item.first); + } + _valid = true; +} + +void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _alloc_plan.size()); + for (Tensor *tensor : _alloc_plan[kernel_index]) + { + _memory_manager->allocate_memory(*tensor); + } +} + +void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const +{ + assert(_valid && kernel_index < _dealloc_plan.size()); + for (Tensor *tensor : _dealloc_plan[kernel_index]) + { + _memory_manager->release_memory(*tensor); + } +} + +RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager) + : _owning_module(owning_module), _memory_manager(memory_manager), + _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager)) +{ +} + +RuntimeGraph::~RuntimeGraph() +{ + for (auto &tensor : _tensors) + { + if (tensor->is_data_allocated()) + _memory_manager->release_memory(*tensor); + } +} + +Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor) +{ + assert(tensor != nullptr); + _tensors.push_back(std::move(tensor)); + return _tensors.back().get(); +} + +void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors) +{ + assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(), + [](Tensor *tensor) { return tensor != nullptr; })); + _input_tensors = input_tensors; +} + +void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors) +{ + assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(), + [](Tensor *tensor) { return tensor != nullptr; })); + _output_tensors = output_tensors; +} + +void RuntimeGraph::configureAllocations(Tensor *tensor) +{ + _memory_manager->allocate_memory(*tensor); +} + +void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel) +{ + assert(kernel != nullptr); + _kernels.push_back(std::move(kernel)); + _tensor_alloc_plan->invalidate(); +} + +void RuntimeGraph::execute() const +{ + if (!_tensor_alloc_plan->isValid()) + _tensor_alloc_plan->build(*this); + + EventNotifier *event_notifier = _owning_module->getEventNotifier(); + + // Notify the observers that the input tensors have changed. + if (event_notifier != nullptr) + { + for (const Tensor *input_tensor : getInputTensors()) + { + if (input_tensor->is_observable()) + event_notifier->postTensorWrite(input_tensor); + } + } + + for (size_t index = 0; index < _kernels.size(); ++index) + { + const auto &kernel = _kernels[index]; + if (event_notifier != nullptr) + { + event_notifier->preOperatorExecute(kernel.get()); + } + + // TODO The `configure` method should only be called if the outputs of an operator need to be + // resized. + kernel->configure(); + + // Preallocate outputs in advance instead of relying on automatic allocation + _tensor_alloc_plan->allocate(index); + + kernel->execute(); + + if (event_notifier != nullptr) + { + event_notifier->postOperatorExecute(kernel.get()); + } + + for (const Tensor *tensor : kernel->getOutputTensors()) + { + if (event_notifier != nullptr && tensor->is_observable()) + { + event_notifier->postTensorWrite(tensor); + } + } + _tensor_alloc_plan->deallocate(index); + } +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h new file mode 100644 index 000000000..8184e249d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H +#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H + +#include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" +#include "core/Kernel.h" + +#include <memory> +#include <vector> + +namespace luci_interpreter +{ + +class RuntimeModule; + +class RuntimeGraph +{ +private: + class TensorAllocPlan; + friend class TensorAllocPlan; + +public: + explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager); + ~RuntimeGraph(); + + Tensor *addTensor(std::unique_ptr<Tensor> &&tensor); + + void setInputTensors(const std::vector<Tensor *> &input_tensors); + void setOutputTensors(const std::vector<Tensor *> &output_tensors); + + void configureAllocations(Tensor *tensor); + + const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; } + const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; } + + void addKernel(std::unique_ptr<Kernel> &&kernel); + + void execute() const; + +private: + IMemoryManager *_memory_manager; + RuntimeModule *_owning_module; + std::vector<std::unique_ptr<Tensor>> _tensors; + std::vector<Tensor *> _input_tensors; + std::vector<Tensor *> _output_tensors; + + // Kernels in execution order. + std::vector<std::unique_ptr<Kernel>> _kernels; + // Tensors that are not used anymore after given op + std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h new file mode 100644 index 000000000..78873b0ec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H +#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H + +#include "core/RuntimeGraph.h" +#include "core/EventNotifier.h" +#include "luci_interpreter/MemoryManager.h" + +#include <memory> +#include <vector> + +namespace luci_interpreter +{ + +class RuntimeModule +{ +public: + explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {} + + EventNotifier *getEventNotifier() const { return _event_notifier; } + + RuntimeGraph *addGraph(IMemoryManager *memory_manager) + { + _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager)); + return _graphs.back().get(); + } + + const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); } + const std::vector<Tensor *> &getOutputTensors() const + { + return getMainGraph()->getOutputTensors(); + } + + void execute() const { getMainGraph()->execute(); } + +private: + RuntimeGraph *getMainGraph() const { return _graphs[0].get(); } + + EventNotifier *const _event_notifier; + std::vector<std::unique_ptr<RuntimeGraph>> _graphs; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H diff --git a/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp new file mode 100644 index 000000000..3c3c5ffff --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/core/Tensor.h" + +#include <cstring> +#include <stdexcept> + +namespace luci_interpreter +{ + +Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization, + std::string name) + : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)), + _name(std::move(name)), _data_allocated(false) +{ +} + +void Tensor::readData(void *data_ptr, size_t data_size) const +{ + const size_t element_size = getDataTypeSize(element_type()); + const int32_t num_elements = shape().num_elements(); + if (data_size != num_elements * element_size) + { + throw std::invalid_argument("Invalid data size."); + } + assert(data_ptr != nullptr); + std::memcpy(data_ptr, data<void>(), data_size); +} + +void Tensor::writeData(const void *data_ptr, size_t data_size) +{ + const size_t element_size = getDataTypeSize(element_type()); + const int32_t num_elements = shape().num_elements(); + if (data_size != num_elements * element_size) + { + throw std::invalid_argument("Invalid data size."); + } + assert(data_ptr != nullptr); + std::memcpy(data<void>(), data_ptr, data_size); +} + +void Tensor::resize(const Shape &new_shape) { _shape = new_shape; } + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt new file mode 100644 index 000000000..dd9733f92 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt @@ -0,0 +1,15 @@ +set(SOURCES + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h" + GraphBuilderRegistry.cpp) + +# include specific builders +file(GLOB_RECURSE NODES "Nodes/*") +list(APPEND SOURCES ${NODES}) + +add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) + +target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import) diff --git a/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp new file mode 100644 index 000000000..a33bca6a4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci_interpreter/GraphBuilderRegistry.h" +#include "Nodes/CircleReferencingConst.h" + +namespace luci_interpreter +{ + +std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying() +{ + auto builder = std::make_unique<luci::GraphBuilderRegistry>(); + { + // redefine NodeBuilder of BUFFER type + builder->add(std::make_unique<CircleReferencingConstNodeBuilder>()); + } + + return builder; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp new file mode 100644 index 000000000..14e90f240 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleReferencingConst.h" + +#include <vector> + +namespace +{ + +// helper struct which describes data loaded to custom_options of CircleReferencingConst node +struct ConstDataReference +{ + const uint8_t *data = nullptr; + uint32_t size = 0; +}; + +} // namespace + +namespace luci_interpreter +{ +using namespace luci; + +CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index, + GraphBuilderContext *context) const +{ + assert(tensor_index >= 0); + + const auto graph = context->graph(); + const auto reader = context->reader(); + const auto tensors = reader->tensors(); + auto const const_tensor = tensors[tensor_index]; + assert(const_tensor != nullptr); + if (const_tensor->is_variable()) + { + // Create CircleVariable for variable + return nullptr; + } + + auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data()); + auto const const_dims = wrap(const_tensor->shape()); // in NHWC + if (const_dims.empty() && buffer.empty()) + { + // unknown shape tensor and scalar tensor + return nullptr; + } + + // if tensor_index is used as output to some other operator, this is not a constant + auto tensoroutputs = context->tensoroutputs(); + if (tensoroutputs->find(tensor_index)) + { + // other operator output tensor + return nullptr; + } + + uint32_t num_elements = 1; + for (uint32_t r = 0; r < const_dims.size(); ++r) + { + num_elements = num_elements * const_dims[r]; + } + + if (buffer.empty() && num_elements > 0) + { + // normal empty tensor + return nullptr; + } + + // create CircleReferencingConst + auto custom_node = graph->nodes()->create<CircleCustom>(0, 1); + { + custom_node->custom_code("CircleReferencingConst"); + + copy_tensor_attributes(const_tensor, custom_node); + custom_node->shape_status(luci::ShapeStatus::VALID); + + // custom options stores size of buffer and pointer's value to buffer's data + { + std::vector<uint8_t> custom_options(sizeof(ConstDataReference)); + { + auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data()); + const_data_ref = {buffer.data(), buffer.size()}; + } + custom_node->custom_options(custom_options); + } + } + + // Output of CircleCustom node presented with CircleConstNode + auto out_node = graph->nodes()->create<CircleCustomOut>(); + { + out_node->index(0); + out_node->input(custom_node); + + copy_tensor_attributes(const_tensor, out_node); + out_node->shape_status(luci::ShapeStatus::VALID); + } + + return out_node; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h new file mode 100644 index 000000000..ed8f95124 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ +#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ + +#include <luci/Import/NodeBuilder.h> + +#include <luci/IR/Nodes/CircleConst.h> + +namespace luci_interpreter +{ +using namespace luci; + +/** + * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer. + */ +class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER> +{ +public: + CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final; +}; + +} // namespace luci_interpreter + +#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__ diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp new file mode 100644 index 000000000..d7bf3084f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Add.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/add.h> +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms) + : KernelWithParams<AddParams>({input1, input2}, {output}, params) +{ +} + +void Add::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + if (input1()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1); + LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && + output()->zero_point() == 0); + } + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Add::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Add::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Add::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +void Add::evalQuantized() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const int left_shift = 20; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + params.left_shift = left_shift; + // The kernel expects inputs' zero points to be negated. + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input1_multiplier = input1_multiplier; + params.input1_shift = input1_shift; + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.input2_multiplier = input2_multiplier; + params.input2_shift = input2_shift; + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastAdd4DSlow( + params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()), + getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), + getTensorShape(input2()), getTensorData<uint8_t>(input2()), + getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +void Add::evalQuantizedS16() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + constexpr int left_shift = 12; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + auto fn = [input1_multiplier, input1_shift, // + input2_multiplier, input2_shift, // + output_multiplier, output_shift, // + activation_min, activation_max](int16_t input1_val, int16_t input2_val) { + const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift; + const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift; + const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input1_val, input1_multiplier, input1_shift); + const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + shifted_input2_val, input2_multiplier, input2_shift); + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( + raw_sum, output_multiplier, output_shift); + const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output)); + return static_cast<int16_t>(clamped_output); + }; + + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()), + getTensorShape(input2()), getTensorData<int16_t>(input2()), + getTensorShape(output()), getTensorData<int16_t>(output()), fn); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.h b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h new file mode 100644 index 000000000..91d95b6af --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ADD_H +#define LUCI_INTERPRETER_KERNELS_ADD_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Add : public KernelWithParams<AddParams> +{ +public: + Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + void evalQuantizedS16() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ADD_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp new file mode 100644 index 000000000..b8b1c3089 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Add.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class AddTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +// for quantized Add, the error shouldn't exceed step +float GetTolerance(float min, float max) +{ + float kQuantizedStep = (max - min) / 255.0; + return kQuantizedStep; +} + +TEST_F(AddTest, Uint8) +{ + std::initializer_list<int32_t> base_shape = {2, 3, 1, 2}; + std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::initializer_list<int32_t> test_shapes[] = { + {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::initializer_list<int32_t> output_shapes[] = { + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + std::vector<std::vector<float>> output_data = { + {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f, + -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f}, + {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f}, + {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f, + -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f}, + {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}}; + float kQuantizedTolerance = GetTolerance(-3.f, 3.f); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f); + for (int i = 0; i < output_data.size(); i++) + { + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } + // Re-run with exchanged inputs. + for (int i = 0; i < output_data.size(); i++) + { + Tensor input1_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +TEST_F(AddTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<float>> test_outputs = { + {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, + 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, + {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, + {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, + 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, + {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } +} + +template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<dtype>> test_outputs = { + {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1, + 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7}, + {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7}, + {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0, + 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7}, + {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}}; + std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(AddTest, SInt32) +{ + CheckInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt64) +{ + CheckInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(AddTest, SInt16) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<int32_t>> ref_output_shapes{ + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector<std::vector<float>> ref_outputs = { + {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f, + 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f, + 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f}, + {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f}, + {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f, + 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f, + 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f}, + {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}}; + + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); + const float tolerance = output_tensor.scale(); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } + // Re-run with exchanged inputs and different scales. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0); + const float tolerance = output_tensor.scale(); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } +} + +TEST_F(AddTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AddTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AddTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + AddParams params{}; + params.activation = Activation::RELU; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(AddTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + AddParams params{}; + params.activation = Activation::NONE; + + Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp new file mode 100644 index 000000000..6561a1783 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ArgMax.h" +#include "kernels/Utils.h" +#include "PALArgMax.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms) + : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params) +{ +} + +void ArgMax::configure() +{ + assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64); + assert(input()->shape().num_dims() >= 1); + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + Shape output_shape(num_dims - 1); + + // If axis value is negative, then update by adding input_shape's num_dims. + // If updated value also negative, then assert. + assert(axis()->shape().num_elements() == 1); + int axis_value = getTensorData<int32_t>(axis())[0]; + if (axis_value < 0) + axis_value = axis_value + num_dims; + assert(axis_value >= 0); + + int j = 0; + for (int i = 0; i < num_dims; i++) + { + if (i == axis_value) + continue; + output_shape.dim(j++) = input_shape.dim(i); + } + + assert(output()->element_type() == _params.output_type); + + output()->resize(output_shape); +} + +void ArgMax::execute() const +{ + +#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ + luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \ + getTensorData<axis_type>(axis()), getTensorShape(output()), \ + getTensorData<output_type>(output()), std::greater<data_type>()) + if (axis()->element_type() == DataType::S32) + { + switch (_params.output_type) + { + case DataType::S32: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int32_t, int32_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + case DataType::S64: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int32_t, int64_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + default: + throw std::runtime_error("Unsupported output type."); + } + } + else + { + switch (_params.output_type) + { + case DataType::S32: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int64_t, int32_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + case DataType::S64: + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_ARG_MAX(float, int64_t, int64_t); + break; + case DataType::U8: + TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t); + break; + default: + throw std::runtime_error("Unsupported input type."); + } + break; + default: + throw std::runtime_error("Unsupported output type."); + } + } +#undef TF_LITE_ARG_MAX +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h new file mode 100644 index 000000000..c851b5891 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ARGMAX_H +#define LUCI_INTERPRETER_KERNELS_ARGMAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ArgMax : public KernelWithParams<ArgMaxParams> +{ +public: + ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axis() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ARGMAX_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp new file mode 100644 index 000000000..474f4b321 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ArgMax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T1, typename T2> +void Check(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> dimension_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data, + std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T1>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor dimension_tensor = + makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType<T2>()); + + ArgMaxParams params{}; + params.output_type = getElementType<T2>(); + ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T> class ArgMaxTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(ArgMaxTest, DataTypes); + +TYPED_TEST(ArgMaxTest, Simple) +{ + Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 1}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{1}); + Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 1}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{1}); +} + +TYPED_TEST(ArgMaxTest, MultiDimensions) +{ + Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 2}, + /*input_data=*/ + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{3, 1}); + Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{}, + /*output_shape=*/{1, 1, 2}, + /*input_data=*/ + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + /*dimension_data=*/{3}, /*output_data=*/{3, 1}); +} + +TEST(ArgMaxTest, UnsupportedType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + ArgMaxParams params{}; + params.output_type = DataType::U8; + ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp new file mode 100644 index 000000000..d3bade9e4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/AveragePool2D.h" + +#include "kernels/Utils.h" + +#include "PALAveragePool2d.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms) + : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params) +{ +} + +void AveragePool2D::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input Tensor and Output Tensor Type must be same"); + } + if (input()->shape().num_dims() != 4) + { + throw std::runtime_error("Input Tensor Shape must be 4-D"); + } + const Shape &input_shape = input()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); + + _padding_height = + computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); + _padding_width = + computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + else if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + else if (input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + output()->resize({batches, output_height, output_width, depth}); + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), + getTensorShape(input()), getTensorShape(output())); +} + +void AveragePool2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalSInt16(); + break; + case DataType::S8: + evalSInt8(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void AveragePool2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void AveragePool2D::evalQuantized() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_ops::AveragePool(params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); +} + +void AveragePool2D::evalSInt8() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::AveragePool<int8_t>( + params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void AveragePool2D::evalSInt16() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::AveragePool( + params, getTensorShape(input()), getTensorData<int16_t>(input()), // + getTensorShape(output()), getTensorData<int16_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h new file mode 100644 index 000000000..2c8fe16e7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H +#define LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class AveragePool2D : public KernelWithParams<Pool2DParams> +{ +public: + AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad, + const Pool2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalSInt16() const; + void evalSInt8() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp new file mode 100644 index 000000000..478bfa68e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/AveragePool2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class AveragePool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(AveragePool2DTest, Float) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 0, 1.5, // + 4.5, 6, // + }; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); +} + +TEST_F(AveragePool2DTest, Uint8_0) +{ + std::vector<float> input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); +} + +TEST_F(AveragePool2DTest, Uint8_1) +{ + std::vector<float> input_data{ + 0, 6, 12, 4, // + 3, 2, 10, 7, // + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); +} + +TEST_F(AveragePool2DTest, SInt16) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + std::vector<float> ref_output_data{ + 0, 1.5, // + 4.5, 6, // + }; + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, SInt8) +{ + Shape input_shape{1, 4, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{-7, -3, 0, 2, -5, 12, -15, 3, 10, 5, + 7, -6, -1, 9, -2, 0, -5, 11, -1, -7}; + std::vector<float> ref_output_data{ + 0, 2.5, // + 1, 1.5, // + }; + + std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor<DataType::S8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG) +{ + Shape input_shape{1, 3, 5}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AveragePool2DTest, In_Out_Type_NEG) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<float> input_data{ + -4, -3, -2, -1, 0, // + 1, 2, 3, 4, 5, // + 6, 7, 8, 9, 10, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(AveragePool2DTest, Quant_Param_NEG) +{ + std::vector<float> input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + + std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f); + std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp new file mode 100644 index 000000000..24ca22996 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/Utils.h" + +#include "PALBatchMatMul.h" + +#include <tensorflow/lite/kernels/internal/reference/transpose.h> + +#include <stdexcept> + +namespace +{ + +tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape) +{ + tflite::RuntimeShape swapped_shape(shape); + const int32_t dims = shape.DimensionsCount(); + swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1)); + swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); + return swapped_shape; +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, + Tensor *y_tmp, const BatchMatMulParams ¶ms) + : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params) +{ +} + +void BatchMatMul::configure() +{ + auto lhs = x(); + auto rhs = y(); + auto adj_x = params().adj_x; + auto adj_y = params().adj_y; + + // TODO Support non-float types + if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32) + throw std::runtime_error("Unsupported type."); + + LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type()); + + auto lhs_rank = lhs->shape().num_dims(); + auto rhs_rank = rhs->shape().num_dims(); + LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4); + LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4); + + auto lhs_scratchpad = temp_lhs(); + auto rhs_scratchpad = temp_rhs(); + luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs), + getTensorShape(rhs)); + + auto output_rank = std::max(lhs_rank, rhs_rank); + + auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs)); + auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs)); + + // Ensure any batch dimensions obey broacasting rules. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + if (lhs_dim != rhs_dim) + { + if (lhs_dim != 1) + { + LUCI_INTERPRETER_CHECK(rhs_dim == 1); + } + } + } + + // Ensure other dimensions work for matrix multiplication. + int accum_dim_lhs = + adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1); + int accum_dim_rhs = + adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2); + LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs); + + Shape output_shape(output_rank); + // Fill in any broadcast dimensions. + for (int i = 0; i < output_rank - 2; ++i) + { + const int lhs_dim = extended_lhs_shape.Dims(i); + const int rhs_dim = extended_rhs_shape.Dims(i); + int broadcast_dim = lhs_dim; + if ((lhs_dim != rhs_dim) && (lhs_dim == 1)) + { + broadcast_dim = rhs_dim; + } + output_shape.dim(i) = broadcast_dim; + } + // Fill in the matmul dimensions. + int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2; + int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1; + + output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index); + output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index); + + output()->resize(output_shape); +} + +void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out) +{ + tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in)); + tflite::RuntimeShape shape(getTensorShape(tensor_in)); + tflite::TransposeParams params; + int rank = shape.DimensionsCount(); + params.perm_count = rank; + for (int i = 0; i < rank - 2; ++i) + { + params.perm[i] = i; + } + // Transpose the last two dimensions. + params.perm[rank - 2] = rank - 1; + params.perm[rank - 1] = rank - 2; + transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2)); + transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1)); + switch (tensor_in->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in), + transposed_shape, getTensorData<float>(tensor_out)); + break; + default: + throw std::runtime_error("Only suppport fp32 BatchMatMul for now."); + } +} + +void BatchMatMul::execute() const +{ + auto lhs = x(); + auto rhs = y(); + + bool adj_x = params().adj_x; + bool adj_y = params().adj_y; + + auto orig_lhs_shape = getTensorShape(lhs); + auto orig_rhs_shape = getTensorShape(rhs); + + auto rhs_tensor = adj_y ? rhs : temp_rhs(); + auto lhs_tensor = adj_x ? temp_lhs() : lhs; + if (not adj_y) + { + TransposeRowsColumns(rhs, temp_rhs()); + } + if (adj_x) + { + TransposeRowsColumns(lhs, temp_lhs()); + } + tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape); + tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape); + + switch (x()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape, + getTensorData<float>(lhs_tensor), getTensorShape(output()), + getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h new file mode 100644 index 000000000..744f49795 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H +#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchMatMul : public KernelWithParams<BatchMatMulParams> +{ +public: + BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp, + const BatchMatMulParams ¶ms); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + Tensor *temp_lhs() const { return _outputs[1]; } + Tensor *temp_rhs() const { return _outputs[2]; } +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp new file mode 100644 index 000000000..edfa3a685 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchMatMul.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class BatchMatMulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(BatchMatMulTest, Float) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6}; + std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = true; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint) +{ + std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = true; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_BatchSizeTwo) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632., + 767., 800., 833., 866.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4})); +} + +TEST_F(BatchMatMulTest, Float_DiffBatch) +{ + std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + kernel.configure(); + _memory_manager->allocate_memory(lhs_scratch); + _memory_manager->allocate_memory(rhs_scratch); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4})); +} + +TEST_F(BatchMatMulTest, Invalid_Shape_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Batch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank_NEG) +{ + Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, Invalid_Rank2_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get()); + Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(BatchMatMulTest, TypeMisMatch_NEG) +{ + Tensor lhs_tensor = + makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get()); + Tensor rhs_tensor = + makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor lhs_scratch(DataType::U8, Shape({}), {}, ""); + Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, ""); + + BatchMatMulParams params; + params.adj_x = false; + params.adj_y = false; + + BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp new file mode 100644 index 000000000..bd315ff7b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchToSpaceND.h" +#include "kernels/Utils.h" + +#include "PALBatchToSpaceND.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +namespace +{ +const int kInputMinDimensionNum = 3; +const int kInputMaxDimensionNum = 4; +} // namespace + +BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops, + Tensor *output) + : Kernel({input, block_shape, crops}, {output}) +{ +} + +void BatchToSpaceND::configure() +{ + + const auto *block_shape_data = block_shape()->data<int32_t>(); + const auto *crops_data = crops()->data<int32_t>(); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int spatial_dims_num = input()->shape().num_dims() - 2; + + LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num); + + LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num); + LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2); + for (int i = 0; i < spatial_dims_num * 2; ++i) + { + LUCI_INTERPRETER_CHECK(crops_data[i] >= 0); + } + + Shape output_shape = Shape(input()->shape().num_dims()); + int output_batch_size = input()->shape().dim(0); + for (int i = 0; i < spatial_dims_num; ++i) + { + LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0); + output_batch_size = output_batch_size / block_shape_data[i]; + output_shape.dim(i + 1) = + input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1]; + } + + output_shape.dim(0) = output_batch_size; + output_shape.dim(input()->shape().num_dims() - 1) = + input()->shape().dim(input()->shape().num_dims() - 1); + output()->resize(output_shape); +} + +void BatchToSpaceND::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::BatchToSpaceND( + getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()), + getTensorData<int32_t>(block_shape()), getTensorShape(crops()), + getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::BatchToSpaceND( + getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()), + getTensorData<int32_t>(block_shape()), getTensorShape(crops()), + getTensorData<int32_t>(crops()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h new file mode 100644 index 000000000..57703ea5d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class BatchToSpaceND : public Kernel +{ +public: + BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops, + Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *block_shape() const { return _inputs[1]; } + const Tensor *crops() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp new file mode 100644 index 000000000..52647a763 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/BatchToSpaceND.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> block_shape_shape, + std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data, + std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor crops_tensor = + makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T> class BatchToSpaceNDTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes); + +TYPED_TEST(BatchToSpaceNDTest, Simple) +{ + Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2}, + /*output_shape=*/{1, 4, 4, 1}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0}, + /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16}); +} + +TEST(BatchToSpaceNDTest, Invalid_Shape_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(BatchToSpaceNDTest, Invalid_Crops_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h new file mode 100644 index 000000000..2d2842a9e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H +#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H + +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0). +template <typename T, typename Op, int N = 5> +void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape, + const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, + const T *input2_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data, + Op op) +{ + if (unextended_input1_shape == unextended_input2_shape) + { + const int flat_size = tflite::MatchingElementsSize( + unextended_input1_shape, unextended_input2_shape, unextended_output_shape); + for (int i = 0; i < flat_size; ++i) + { + output_data[i] = op(input1_data[i], input2_data[i]); + } + } + else + { + assert(unextended_input1_shape.DimensionsCount() <= N); + assert(unextended_input2_shape.DimensionsCount() <= N); + assert(unextended_output_shape.DimensionsCount() <= N); + + tflite::NdArrayDesc<N> desc1{}; + tflite::NdArrayDesc<N> desc2{}; + tflite::NdArrayDesc<N> output_desc{}; + tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, + &desc1, &desc2); + tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + auto fn = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + op(input1_data[SubscriptToIndex(desc1, indexes)], + input2_data[SubscriptToIndex(desc2, indexes)]); + }; + tflite::NDOpsHelper<N>(output_desc, fn); + } +} + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt new file mode 100644 index 000000000..9f4ba0e0b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt @@ -0,0 +1,43 @@ +set(SOURCES + BinaryOpCommon.h + Utils.h + Utils.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp) + +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "${NODE}.h") + list(APPEND SOURCES "${NODE}.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) + +add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) + +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common) + +add_pal_to_target(${LUCI_INTERPRETER_KERNELS}) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +macro(REGISTER_KERNEL NODE) + list(APPEND TEST_SOURCES "${NODE}.test.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) + +list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp) + +GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS}) diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp new file mode 100644 index 000000000..39ee725dc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Cast.h" +#include "kernels/Utils.h" + +namespace +{ + +using namespace luci_interpreter; +using namespace luci_interpreter::kernels; + +template <typename InT, typename OutT> +void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count) +{ + std::transform(in_data, in_data + elements_count, out_data, + [](InT a) { return static_cast<OutT>(a); }); +} + +template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor) +{ + auto const out_type = out_tensor->element_type(); + auto const elements_count = out_tensor->shape().num_elements(); + + switch (out_type) + { + case loco::DataType::U8: + cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count); + break; + case loco::DataType::U16: + cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count); + break; + case loco::DataType::U32: + cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count); + break; + case loco::DataType::U64: + cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count); + break; + case loco::DataType::S8: + cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count); + break; + case loco::DataType::S16: + cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count); + break; + case loco::DataType::S32: + cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count); + break; + case loco::DataType::S64: + cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count); + break; + case loco::DataType::FLOAT32: + cast_data(in_data, getTensorData<float>(out_tensor), elements_count); + break; + case loco::DataType::BOOL: + cast_data(in_data, getTensorData<bool>(out_tensor), elements_count); + break; + default: + throw std::runtime_error("Unsupported output type."); + } +} + +void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor) +{ + auto in_type = in_tensor->element_type(); + + switch (in_type) + { + case loco::DataType::U8: + cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor); + break; + case loco::DataType::U16: + cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor); + break; + case loco::DataType::U32: + cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor); + break; + case loco::DataType::U64: + cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor); + break; + case loco::DataType::S8: + cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor); + break; + case loco::DataType::S16: + cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor); + break; + case loco::DataType::S32: + cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor); + break; + case loco::DataType::S64: + cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor); + break; + case loco::DataType::FLOAT32: + cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor); + break; + case loco::DataType::BOOL: + cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor); + break; + default: + throw std::runtime_error("Unsupported input type."); + } +} + +} // namespace + +namespace luci_interpreter +{ +namespace kernels +{ + +Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Cast::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown); + LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown); + + const Shape &shape = input()->shape(); + output()->resize(shape); +} + +void Cast::execute() const +{ + assert(input()->shape().num_elements() == output()->shape().num_elements()); + + cast_from_tensor_to_tensor(input(), output()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h new file mode 100644 index 000000000..f0bd02037 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CAST_H +#define LUCI_INTERPRETER_KERNELS_CAST_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Cast : public Kernel +{ +public: + Cast(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CAST_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp new file mode 100644 index 000000000..4713ad34c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Cast.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T1, typename T2> +void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data, + std::initializer_list<T2> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType input_type = getElementType<T1>(); + constexpr DataType output_type = getElementType<T2>(); + + Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + +template <typename T> +void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data, + std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType input_type = loco::DataType::BOOL; + constexpr DataType output_type = getElementType<T>(); + std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted; + for (auto elem : input_data) + { + input_data_converted.push_back(elem); + } + + Tensor input_tensor = + makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + +template <typename T> class CastTest : public ::testing::Test +{ +}; + +using IntDataTypes = + ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>; +TYPED_TEST_SUITE(CastTest, IntDataTypes); + +TYPED_TEST(CastTest, FloatToInt) +{ + Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToFloat) +{ + Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }); + SUCCEED(); +} + +template <typename T1, typename T2> void check_int() +{ + Check<T1, T2>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToInt) +{ + check_int<TypeParam, uint8_t>(); + check_int<TypeParam, uint16_t>(); + check_int<TypeParam, uint32_t>(); + check_int<TypeParam, uint64_t>(); + check_int<TypeParam, int8_t>(); + check_int<TypeParam, int16_t>(); + check_int<TypeParam, int32_t>(); + check_int<TypeParam, int64_t>(); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToBool) +{ + Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 0, 7, 0, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, BoolToInt) +{ + CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1, 0, 0, 1, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToBool) +{ + Check<float, bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToFloat) +{ + CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 0.0f, 1.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToFloat) +{ + Check<float, float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToBool) +{ + CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, true, false, false, // + }, + /*output_data=*/ + { + true, true, false, false, // + }); + SUCCEED(); +} + +TEST(CastTest, UnsupportedType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::Unknown); + + Cast kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); + SUCCEED(); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp new file mode 100644 index 000000000..46ee5941e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Concatenation.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/concatenation.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output, + const ConcatenationParams ¶ms) + : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params) +{ +} + +void Concatenation::configure() +{ + const int num_inputs = _inputs.size(); + LUCI_INTERPRETER_CHECK(num_inputs > 0); + const Tensor *t0 = _inputs[0]; + + // TODO: Support concat with fused activation function + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE); + + int axis = _params.axis; + if (axis < 0) + axis += t0->shape().num_dims(); + LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims()); + + int32_t sum_axis = t0->shape().dim(axis); + for (int i = 1; i < num_inputs; ++i) + { + const Tensor *tensor = _inputs[i]; + LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type()); + LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims()); + for (int d = 0; d < t0->shape().num_dims(); ++d) + { + if (d == axis) + { + sum_axis += tensor->shape().dim(axis); + } + else + { + LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d)); + } + } + } + + Shape output_shape = t0->shape(); + output_shape.dim(axis) = sum_axis; + + // If input tensors are INT8 type then quantization parameters of all input tensors and the output + // should be the same + for (auto current_tensor : _inputs) + { + if (current_tensor->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() == + output()->quantized_dimension()); + + LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() == + current_tensor->scales().size()); + LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points()); + LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales()); + } + } + output()->resize(output_shape); +} + +void Concatenation::execute() const +{ + switch (_inputs[0]->element_type()) + { + case DataType::FLOAT32: + evalGeneric<float>(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S8: + evalGeneric<int8_t>(); + break; + case DataType::S32: + evalGeneric<int32_t>(); + break; + case DataType::S64: + evalGeneric<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void Concatenation::evalGeneric() const +{ + int axis = _params.axis; + if (axis < 0) + axis += output()->shape().num_dims(); + + VectorOfTensors<T, true> inputs(_inputs); + tflite::ConcatenationParams params{}; + params.axis = axis; + params.inputs_count = _inputs.size(); + tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(), + getTensorShape(output()), getTensorData<T>(output())); +} + +void Concatenation::evalQuantized() const +{ + int axis = _params.axis; + if (axis < 0) + axis += output()->shape().num_dims(); + + VectorOfQuantizedTensors<true> inputs(_inputs); + tflite::ConcatenationParams params{}; + params.axis = axis; + params.input_zeropoint = inputs.zero_point(); + params.input_scale = inputs.scale(); + params.inputs_count = _inputs.size(); + params.output_zeropoint = output()->zero_point(); + params.output_scale = output()->scale(); + + tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(), + getTensorShape(output()), + getTensorData<uint8_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h new file mode 100644 index 000000000..b48c8ed1e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H +#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Concatenation : public KernelWithParams<ConcatenationParams> +{ +public: + Concatenation(std::vector<const Tensor *> inputs, Tensor *output, + const ConcatenationParams ¶ms); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void evalGeneric() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp new file mode 100644 index 000000000..f893b38fd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Concatenation.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ConcatenationTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ConcatenationTest, Float) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + // Try different 'axis' and expect different results. + { + params.axis = 0; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + for (auto t : kernel.getOutputTensors()) + { + _memory_manager->allocate_memory(*t); + } + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); + } + { + params.axis = -2; // Same as '0'. + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})); + } + { + params.axis = 1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); + } + { + params.axis = -1; // Same as '1'. + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})); + } +} + +TEST_F(ConcatenationTest, Input_Number_Check_NEG) +{ + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Invalid_Axis_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -3; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG) +{ + std::vector<uint8_t> input1_data{1, 2, 3, 4}; + std::vector<int8_t> input2_data{5, 6, 7, 8}; + Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + int quantized_dimension = 3; + std::vector<float> scales{0.1, 0.2, 0.3}; + std::vector<int32_t> zero_points{1, -1, 1}; + + Tensor input1_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0)); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4}; + std::vector<float> input2_data{5, 6, 7, 8}; + float scale = 0.1; + int32_t zero_point_1 = 1; + int32_t zero_point_2 = -1; + + Tensor input1_tensor = + makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1); + ConcatenationParams params{}; + + params.axis = -1; + params.activation = luci::FusedActFunc::NONE; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +// TODO: Remove this test when concat w/ fused_activation is supported +TEST_F(ConcatenationTest, With_Fused_Activation_NEG) +{ + std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; + std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + ConcatenationParams params{}; + + params.axis = 1; + params.activation = luci::FusedActFunc::RELU; + + Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp new file mode 100644 index 000000000..234f95425 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Conv2D.h" + +#include "kernels/Utils.h" + +#include "PALConv2d.h" + +#include <stdexcept> +#include <thread> + +namespace luci_interpreter +{ +namespace kernels +{ + +Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, + Tensor *scratchpad, const Conv2DParams ¶ms) + : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params) +{ +} + +void Conv2D::configure() +{ + // TensorFlow Lite (as of v2.2.0) supports the following combinations of types: + // | input filter bias output | + // ----+---------------------------+ + // (1) | float float float float | + // (2) | float int8 float float | hybrid + // (3) | uint8 uint8 int32 uint8 | quantized + // (4) | int8 int8 int32 int8 | quantized per channel + // + // We only support (1), (3) and (4) for now, and additionally the following: + // | input filter bias output | + // ----+---------------------------+ + // (5) | int16 int16 int64 int16 | + // + if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + } + else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + } + else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); + } + else + { + throw std::runtime_error("Unsupported type."); + } + LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3)); + + LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 && + bias()->shape().dim(0) == output_depth)); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, + _params.dilation_height_factor); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, + _params.dilation_width_factor); + + _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor, + input_height, filter_height, output_height); + _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width, + filter_width, output_width); + + output()->resize({batches, output_height, output_width, output_depth}); + + // Allocate tensor for scratchpad, if needed. + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params, + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); + + switch (_params.activation) + { + case Activation::NONE: + case Activation::RELU: + case Activation::RELU6: + case Activation::RELU_N1_TO_1: + break; + default: + throw std::runtime_error("Unsupported fused activation"); + } +} + +void Conv2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + if (filter()->element_type() == DataType::FLOAT32) + { + evalFloat(); + break; + } + throw std::runtime_error("Unsupported type."); + case DataType::U8: + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } + break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Conv2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + float *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<float>(); + + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(filter()), getTensorData<float>(filter()), + getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output()), + getTensorShape(scratchpad), scratchpad_data); +} + +void Conv2D::evalQuantized() const +{ + const auto input_scale = static_cast<double>(input()->scale()); + const auto filter_scale = static_cast<double>(filter()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const double real_multiplier = input_scale * filter_scale / output_scale; + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = -filter()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(filter()), getTensorData<uint8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output()), + getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad)); +} + +void Conv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector<ChannelQuantMultipliers> multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int32_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast<int32_t>(input_val - input()->zero_point()) * + static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +void Conv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; // Unused in tflite code + params.output_offset = output()->zero_point(); + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector<int32_t> shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector<int32_t> multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::ConvPerChannel( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void Conv2D::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + const auto *filter_data = getTensorData<int16_t>(filter()); + const auto *bias_data = getTensorData<int64_t>(bias()); + auto *output_data = getTensorData<int16_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scale = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + const std::vector<ChannelQuantMultipliers> multipliers_raw = + quantizeMultipliers(effective_output_scale); + BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int64_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val); + } + } + } + } + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, multipliers[out_c].multiplier, multipliers[out_c].shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h new file mode 100644 index 000000000..330bf3a2a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H +#define LUCI_INTERPRETER_KERNELS_CONV2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class Conv2D : public KernelWithParams<Conv2DParams> +{ +public: + Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, + Tensor *scratchpad, const Conv2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *filter() const { return _inputs[1]; } + const Tensor *bias() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; + void evalQuantizedS16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp new file mode 100644 index 000000000..0fe6ef795 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -0,0 +1,707 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Conv2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class Conv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(Conv2DTest, Float) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 11, 16, 7, 20, // row = 0 + 0, 40, 0, 44, // row = 1 + }; + std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, FloatPointwise) +{ + Shape input_shape{1, 2, 2, 2}; + Shape filter_shape{2, 1, 1, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, // row = 0, col = 0 + 3, 4, // row = 0, col = 1 + 5, 6, // row = 1, col = 0 + 7, 8, // row = 1, col = 1 + }; + std::vector<float> filter_data{ + -1, 2, // out = 0 + -3, 4, // out = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 4, 7, 6, 9, // row = 0 + 8, 11, 10, 13, // row = 1 + }; + std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, FloatCheck) +{ + Shape input_shape{2, 2, 4, 1}; + Shape filter_shape{3, 2, 2, 1}; + Shape bias_shape{3}; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, Uint8) +{ + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, Uint8_CWQ) +{ + const int output_channels = 3; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, SInt8_CWQ) +{ + const int output_channels = 3; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::S8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, SInt16) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; + + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + std::vector<float> ref_output_data{ + 11, 16, 7, 20, // row = 0 + 0, 40, 0, 44, // row = 1 + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(Conv2DTest, SInt16_CWQ_weights) +{ + Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C + Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels + Shape bias_shape{3}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 3}; + + std::vector<float> input_data{ + 1, 2, // row = 0, col 0 + 3, 4, // row = 0, col 1 + 5, 6, // row = 1, col 0 + 7, 8, // row = 1, col 1 + }; + std::vector<float> filter_data{ + 4, -3, // out = 0 + 1, -3, // out = 1 + 5, -3, // out = 2 + }; + std::vector<float> bias_data{1, 10, 5}; + std::vector<float> ref_output_data{ + 0, 5, 4, // row 0, col 0 + 1, 1, 8, // row 0, col 1 + 3, 0, 12, // row 1, col 0 + 5, 0, 16, // row 1, col 1 + }; + + float input_scale = 0.25f; + float output_scale = 0.05f; + std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f}; + std::vector<float> bias_scales; + for (int i = 0; i < filter_scales.size(); ++i) + bias_scales.push_back(filter_scales[i] * input_scale); + std::vector<int32_t> zerop = {0, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<int32_t> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_Bias_Type_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<uint8_t> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_Bias_Data_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{3}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2, 3}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_Input_Shape_NEG) +{ + Shape input_shape{1, 4, 6, 1}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG) +{ + Shape input_shape{1, 4, 3, 2}; + Shape filter_shape{2, 2, 2, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, 3, 4, 5, 6, // row = 0 + 7, 8, 9, 10, 11, 12, // row = 1 + 13, 14, 15, 16, 17, 18, // row = 2 + 19, 20, 21, 22, 23, 24, // row = 3 + }; + std::vector<float> filter_data{ + 1, 2, -3, -4, // out = 0, row = 0 + -5, 6, -7, 8, // out = 1, row = 0 + 4, -2, 3, -1, // out = 0, row = 1 + -8, -6, 7, 5, // out = 1, row = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::TANH; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp new file mode 100644 index 000000000..3a9acd1d4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthToSpace.h" +#include "Utils.h" +#include "PALDepthToSpace.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms) + : KernelWithParams<DepthToSpaceParams>({input}, {output}, params) +{ +} + +void DepthToSpace::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8) + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()) + const int block_size = params().block_size; + const int32_t input_height = input()->shape().dim(1); + const int32_t input_width = input()->shape().dim(2); + const int32_t input_channels = input()->shape().dim(3); + int32_t output_height = input_height * block_size; + int32_t output_width = input_width * block_size; + int32_t output_channels = input_channels / block_size / block_size; + + LUCI_INTERPRETER_CHECK(input_height == output_height / block_size); + LUCI_INTERPRETER_CHECK(input_width == output_width / block_size); + LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size); + + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = output_height; + output_shape.dim(2) = output_width; + output_shape.dim(3) = output_channels; + + output()->resize(output_shape); +} + +void DepthToSpace::execute() const +{ + tflite::DepthToSpaceParams op_params; + op_params.block_size = params().block_size; + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported Type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h new file mode 100644 index 000000000..63ce37610 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class DepthToSpace : public KernelWithParams<DepthToSpaceParams> +{ +public: + DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp new file mode 100644 index 000000000..88e6e07f1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/DepthToSpace.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> class DepthToSpaceTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes); + +TYPED_TEST(DepthToSpaceTest, SimpleCase) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8}; + std::vector<int32_t> output_shape{1, 2, 4, 1}; + + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(DepthToSpaceTest, InvalidInputShape_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 2, 4}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(DepthToSpaceTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + DepthToSpaceParams params{}; + params.block_size = 2; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(DepthToSpaceTest, InvalidBlockSize_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; + Shape input_shape{1, 1, 2, 4}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthToSpaceParams params{}; + params.block_size = 3; + + DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp new file mode 100644 index 000000000..c554c309d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/DepthwiseConv2D.h" + +#include "kernels/Utils.h" + +#include "PALDepthwiseConv2d.h" + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, + Tensor *output, Tensor *scratchpad, + const DepthwiseConv2DParams ¶ms) + : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params) +{ +} + +void DepthwiseConv2D::configure() +{ + // TensorFlow Lite (as of v2.2.0) supports the following combinations of types: + // | input filter bias output | + // ----+---------------------------+ + // (1) | float float float float | + // (2) | float int8 float float | hybrid + // (3) | uint8 uint8 int32 uint8 | quantized + // (4) | int8 int8 int32 int8 | quantized per channel + // (5) | int16 int8 int64 int16 | quantized per channel 16x8 + // + // We only support (1), (3) and (4) for now, and additionally the following: + // | input filter bias output | + // ----+---------------------------+ + // (5) | int16 int16 int64 int16 | + // + if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32); + } + else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) == + filter()->scales().size()); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } + else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); + } + else + { + throw std::runtime_error("Unsupported type."); + } + LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + // Filter format: [1, H, W, O]. + LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t channels_out = filter_shape.dim(3); + + LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 && + bias()->shape().dim(0) == channels_out)); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height, + _params.dilation_height_factor); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width, + _params.dilation_width_factor); + + _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor, + input_height, filter_height, output_height); + _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width, + filter_width, output_width); + + output()->resize({batches, output_height, output_width, channels_out}); + + tflite::DepthwiseParams params{}; + + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + + auto scratchpad = getOutputTensors()[1]; + luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(), + getTensorShape(input()), getTensorShape(filter()), + getTensorShape(output())); +} + +void DepthwiseConv2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + if (filter()->element_type() == DataType::FLOAT32) + { + evalFloat(); + break; + } + throw std::runtime_error("Unsupported type."); + case DataType::U8: + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(3))); + evalQuantizedPerChannel(); + } + break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void DepthwiseConv2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + tflite::reference_ops::DepthwiseConv( + params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), + getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void DepthwiseConv2D::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + for (int out_x = 0; out_x < output_width; ++out_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + for (int m = 0; m < depth_multiplier; ++m) + { + const int output_channel = m + in_channel * depth_multiplier; + const int in_x_origin = (out_x * stride_width) - _padding_width; + const int in_y_origin = (out_y * stride_height) - _padding_height; + int32 acc = 0; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + const int in_y = in_y_origin + dilation_height_factor * filter_y; + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + if (is_point_inside_image) + { + int32 input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)]; + int32 filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)]; + acc += (filter_val - filter()->zero_points()[output_channel]) * + (input_val - input()->zero_point()); + } + } + } + if (bias_data) + { + acc += bias_data[output_channel]; + } + int32_t output_multiplier = quant_multipliers[output_channel].multiplier; + int output_shift = quant_multipliers[output_channel].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] = + static_cast<uint8_t>(scaled_acc); + } + } + } + } + } +} + +void DepthwiseConv2D::evalQuantized() const +{ + const auto input_scale = static_cast<double>(input()->scale()); + const auto filter_scale = static_cast<double>(filter()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const double real_multiplier = input_scale * filter_scale / output_scale; + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = -filter()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_ops::DepthwiseConv( + params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()), + getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void DepthwiseConv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + + params.padding_type = tflite::PaddingType::kSame; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; + params.output_offset = output()->zero_point(); + params.output_multiplier = 1; // unused in tflite code + params.output_shift = 0; // unused in tflite code + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector<int32_t> shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector<int32_t> multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + if (scratchpad->is_allocatable()) + scratchpad_data = scratchpad->data<int8_t>(); + + luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void DepthwiseConv2D::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + const auto *filter_data = getTensorData<int16_t>(filter()); + const auto *bias_data = getTensorData<int64_t>(bias()); + auto *output_data = getTensorData<int16_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + const int32_t dilation_height_factor = _params.dilation_height_factor; + const int32_t dilation_width_factor = _params.dilation_width_factor; + const int32_t depth_multiplier = _params.depth_multiplier; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers_raw = + quantizeMultipliers(effective_output_scales); + + BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + for (int32_t m = 0; m < depth_multiplier; ++m) + { + const int32_t out_c = m + in_c * depth_multiplier; + const int32_t in_y_origin = out_y * stride_height - _padding_height; + const int32_t in_x_origin = out_x * stride_width - _padding_width; + int64_t acc = 0; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t in_y = in_y_origin + dilation_height_factor * filter_y; + const int32_t in_x = in_x_origin + dilation_width_factor * filter_x; + if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width)) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)]; + acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val); + } + } + } + if (bias_data != nullptr) + { + acc += bias_data[out_c]; + } + + int32_t output_multiplier = quant_multipliers[out_c].multiplier; + int output_shift = quant_multipliers[out_c].shift; + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h new file mode 100644 index 000000000..3d1faf6c1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H +#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams> +{ +public: + DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, + Tensor *scratchpad, const DepthwiseConv2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *filter() const { return _inputs[1]; } + const Tensor *bias() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; + void evalQuantizedS16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp new file mode 100644 index 000000000..6b4673f3e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/DepthwiseConv2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DepthwiseConv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(DepthwiseConv2DTest, Float) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(scratchpad); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); +} + +TEST_F(DepthwiseConv2DTest, Uint8) +{ + std::vector<float> input_data{ + 1, 2, 7, 8, // column 1 + 3, 4, 9, 10, // column 2 + 5, 6, 11, 12, // column 3 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); +} + +TEST_F(DepthwiseConv2DTest, SInt16) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, 4}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S64, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, 0, 99, 0, // + 167, 0, 227, 28, // + }; + + float input_scale = 0.25; + std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f}; + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_scales[i] * input_scale); + std::vector<int32_t> zerop(4, 0); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + Tensor scratchpad(DataType::S16, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15)); + filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(1, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + Tensor scratchpad(DataType::S8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<int32_t> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + Tensor scratchpad(DataType::U8, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG) +{ + Shape input_shape{4, 2, 2}; + Shape filter_shape{2, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{2, 1, 2, 4}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG) +{ + Shape input_shape{1, 4, 2, 2}; + Shape filter_shape{1, 2, 4, 2}; + Shape bias_shape{4}; + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + 13, 14, 15, 16, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, ""); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 2; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp new file mode 100644 index 000000000..96399e5c7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/Utils.h" +#include "PALDequantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Dequantize::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 || + input()->element_type() == loco::DataType::U8 || + input()->element_type() == loco::DataType::S16); + + LUCI_INTERPRETER_CHECK(input()->scales().size() == 1); + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + + output()->resize(input()->shape()); +} + +void Dequantize::execute() const +{ + tflite::DequantizationParams op_params; + op_params.zero_point = input()->zero_point(); + op_params.scale = input()->scale(); + + switch (input()->element_type()) + { + case loco::DataType::U8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case loco::DataType::S8: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()), + getTensorData<int16_t>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h new file mode 100644 index 000000000..5565df0e4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Dequantize : public Kernel +{ +public: + Dequantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp new file mode 100644 index 000000000..0cab633d6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Dequantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DequantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(DequantizeTest, Uint8) +{ + std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint8) +{ + std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, Sint16) +{ + std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(DequantizeTest, InvalidInputType_NEG) +{ + std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidOutputType_NEG) +{ + std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131}; + + Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, ""); + + _memory_manager->allocate_memory(input_tensor); + input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t)); + + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Dequantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp new file mode 100644 index 000000000..dd1532278 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Div.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/div.h> +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms) + : KernelWithParams<DivParams>({input1, input2}, {output}, params) +{ +} + +void Div::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Div::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Div::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Div::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +void Div::evalQuantized() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const double real_output_multiplier = input1_scale / (input2_scale * output_scale); + + int32_t output_multiplier{}; + int output_shift{}; + + quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastDivSlow( + params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()), + getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), + getTensorShape(input2()), getTensorData<uint8_t>(input2()), + getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.h b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h new file mode 100644 index 000000000..c1bf3e10b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_DIV_H +#define LUCI_INTERPRETER_KERNELS_DIV_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Div : public KernelWithParams<DivParams> +{ +public: + Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_DIV_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp new file mode 100644 index 000000000..85cd8b90a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Div.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class DivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +float GetTolerance(float min, float max) +{ + const float kQuantizedStep = (max - min) / 255.0f; + const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST_F(DivTest, Float) +{ + Shape base_shape = {2, 3, 1, 1}; + + std::vector<int32_t> output_shape = {2, 3, 1, 1}; + + std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; + std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f}; + std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST_F(DivTest, FloatBroadcast) +{ + Shape input1_shape = {1, 3}; + Shape input2_shape = {3, 1}; + + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f}; + std::vector<float> input2_data{0.2f, 1.6f, 0.5f}; + std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); +} + +TEST_F(DivTest, Uint8) +{ + Shape base_shape = {1, 2, 2, 1}; + + std::vector<int32_t> output_shape = {1, 2, 2, 1}; + + std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f}; + std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f}; + std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f}; + + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f); + + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get()); + + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(test_outputs, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + std::vector<std::vector<dtype>> test_outputs = {{5, 6, 2, 0, 10, 3, // + 10, 0, 4, 5, 20, 0, // + 0, 0, 0, 2, 0, 0, // + 2, 0, 1, 10, 5, 0, // + 2, 3, 1, 0, 5, 1, // + 18, 20, 7, 0, 37, 10}, + {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10}, + {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0, + 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10}, + {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}}; + std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100}; + std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(DivTest, SInt64) +{ + checkInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, SInt32) +{ + checkInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(DivTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(DivTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(DivTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + DivParams params{}; + params.activation = Activation::RELU; + + Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp new file mode 100644 index 000000000..697d63be4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Elu.h" +#include "kernels/Utils.h" + +#include "PALElu.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Elu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Elu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h new file mode 100644 index 000000000..c844ab57f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ELU_H +#define LUCI_INTERPRETER_KERNELS_ELU_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Elu : public Kernel +{ +public: + Elu(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp new file mode 100644 index 000000000..814499cdb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Elu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Elu kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + (void)output_shape; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); +} + +TEST(EluTest, SimpleElu) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }, + /*output_data=*/ + { + 0.0, -0.997521, 2.0, -0.981684, // + 3.0, -0.864665, 10.0, -0.0951626, // + }); +} + +TEST(EluTest, InOutTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, -6, 2, -4, // + 3, -2, 10, -0.1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Elu kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp new file mode 100644 index 000000000..a57e127b7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Equal.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Equal::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Equal::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Equal::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void Equal::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Equal::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h new file mode 100644 index 000000000..c9be32cc0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Equal : public Kernel +{ +public: + Equal(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp new file mode 100644 index 000000000..5870e5460 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Equal.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class EqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(EqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + false, true, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(EqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + 0.9, 0.7, 0.5, // Row 4 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + false, false, false, // Row 2 + false, false, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + false, true, false, // Row 3 + true, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(EqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(EqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(EqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.5, 0.55, 0.5, // Row 1 + -1, 0, 0.05, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, false, false, // Row 1 + false, true, true, false, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(EqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + -1, 0.05, 0, 1, // Row 4 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, false, false, false, // Row 1 + false, false, true, false, // Row 2 + false, false, false, false, // Row 3 + true, true, true, true, // Row 4 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(EqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(EqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Equal kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp new file mode 100644 index 000000000..e7c560a88 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Exp.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/exp.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Exp::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Exp::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Exp::evalFloat() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h new file mode 100644 index 000000000..429177375 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXP_H +#define LUCI_INTERPRETER_KERNELS_EXP_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Exp : public Kernel +{ +public: + Exp(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXP_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp new file mode 100644 index 000000000..a159d9db9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Exp.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(ExpTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{1, 1, 7}; + std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Exp kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> ref_output_shape{1, 1, 7}; + std::vector<float> ref_output_data{std::exp(0.0f), std::exp(1.0f), std::exp(-1.0f), + std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f), + std::exp(-0.01f)}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp new file mode 100644 index 000000000..ba35c99fa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output) + : Kernel({input, axis}, {output}) +{ +} + +void ExpandDims::configure() +{ + int32_t axis_value; + + switch (axis()->element_type()) + { + case loco::DataType::S32: + axis_value = *getTensorData<int32_t>(axis()); + break; + case loco::DataType::S64: + axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis())); + break; + default: + throw std::runtime_error("Unsupported type."); + } + + const auto input_shape = input()->shape(); + + if (axis_value < 0) + { + axis_value += input_shape.num_dims() + 1; + } + + LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0); + + Shape output_shape(input_shape.num_dims() + 1); + for (int32_t i = 0; i < output_shape.num_dims(); ++i) + { + if (i < axis_value) + { + output_shape.dim(i) = input_shape.dim(i); + } + else if (i == axis_value) + { + output_shape.dim(i) = 1; + } + else + { + LUCI_INTERPRETER_CHECK(i >= 1); + output_shape.dim(i) = input_shape.dim(i - 1); + } + } + + output()->resize(output_shape); +} + +void ExpandDims::execute() const +{ + // Just copy input to output + const auto *input_data = input()->data<void>(); + auto *output_data = output()->data<void>(); + + const size_t element_size = getDataTypeSize(input()->element_type()); + const int32_t num_elements = input()->shape().num_elements(); + std::memcpy(output_data, input_data, num_elements * element_size); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h new file mode 100644 index 000000000..e510b1160 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H +#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ExpandDims : public Kernel +{ +public: + ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axis() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp new file mode 100644 index 000000000..df9eaccc0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ExpandDims.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ExpandDimsTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ExpandDimsTest, PositiveAxis) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {0}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2})); +} + +TEST_F(ExpandDimsTest, NegAxis) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {-1}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1})); +} + +TEST_F(ExpandDimsTest, InvalidAxisType_NEG) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<float> axis_value = {1.0}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ExpandDimsTest, InvalidAxisValue_NEG) +{ + std::vector<int32_t> input_data{-1, 1, -2, 2}; + std::initializer_list<int32_t> input_shape = {2, 2}; + + std::initializer_list<int32_t> axis_value = {3}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp new file mode 100644 index 000000000..e09d6331a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/Utils.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output) + : Kernel({dims, value}, {output}) +{ +} + +template <typename T> void Fill::configureShape() +{ + const auto dims_data = getTensorData<T>(dims()); + Shape output_shape(dims()->shape().dim(0)); + + for (int i = 0; i < output_shape.num_dims(); ++i) + { + T data = dims_data[i]; + if (data < 0) + throw std::runtime_error("Fill dimensions must be >= 0"); + + output_shape.dim(i) = data; + } + + output()->resize(output_shape); +} + +void Fill::configure() +{ + const auto dims_shape = dims()->shape(); + const auto value_shape = value()->shape(); + + // Make sure the 1st input tensor is 1-D + LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1); + + // Make sure the 1st input tensor is int32 or int64 + LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or + dims()->element_type() == DataType::S64); + + // Make sure the 2nd input tensor is a scalar + LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0) + + // Check zero point and scale for S16 and S8 + if (value()->element_type() == loco::DataType::S16 or + value()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale()); + LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point()); + + if (value()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(value()->zero_point() == 0); + } + // Resize output + switch (dims()->element_type()) + { + case DataType::S32: + configureShape<int32_t>(); + break; + case DataType::S64: + configureShape<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Fill::execute() const +{ + switch (output()->element_type()) + { + case DataType::S8: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()), + getTensorShape(output()), getTensorData<int8_t>(output())); + break; + case DataType::S16: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()), + getTensorShape(output()), getTensorData<int16_t>(output())); + break; + case DataType::S32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()), + getTensorShape(output()), getTensorData<int32_t>(output())); + break; + case DataType::S64: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()), + getTensorShape(output()), getTensorData<int64_t>(output())); + break; + case DataType::FLOAT32: + tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()), + getTensorShape(output()), getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h new file mode 100644 index 000000000..184f0cb83 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FILL_H +#define LUCI_INTERPRETER_KERNELS_FILL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Fill : public Kernel +{ +public: + Fill(const Tensor *dims, const Tensor *value, Tensor *output); + + const Tensor *dims() const { return _inputs[0]; } + const Tensor *value() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void configureShape(); +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FILL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp new file mode 100644 index 000000000..cf56df507 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Fill.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FillTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector<int32_t> dims_data = {2, 3}; + std::vector<T> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data); + + std::vector<int32_t> ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager) +{ + Shape dims_shape{2}; + + std::vector<int32_t> dims_data = {2, 3}; + std::vector<float> value_data = {5}; + + int32_t zero_point = 0; + + if (DT == loco::DataType::S8) + zero_point = 1; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager); + Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point, + value_data, memory_manager); + + Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + + std::vector<int32_t> ref_output_shape{2, 3}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, FillInt) +{ + // Run for int32_t input + runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get()); + // Run for int64_t input + runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get()); + // Run for int8_t input + runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get()); + // Run for int16_t input + runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get()); + + SUCCEED(); +} + +TEST_F(FillTest, FillFloat) +{ + Shape dims_shape{3}; + + std::vector<int64_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5}; + + std::vector<int32_t> ref_output_shape{2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FillTest, Invalid_Input_Shape_NEG) +{ + Shape dims_shape{1, 3}; + + std::vector<int32_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = + makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FillTest, Invalid_Value_Shape_NEG) +{ + Shape dims_shape{3}; + + std::vector<int32_t> dims_data = {2, 2, 2}; + std::vector<float> value_data = {5}; + + Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get()); + Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + Fill kernel(&dims, &value, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp new file mode 100644 index 000000000..e3c4246cc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Floor.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/floor.h> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Floor::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void Floor::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Floor::evalFloat() const +{ + tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h new file mode 100644 index 000000000..ca3ad5997 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Floor : public Kernel +{ +public: + Floor(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp new file mode 100644 index 000000000..30076fb54 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Floor.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorTest, SimpleFloat) +{ + std::initializer_list<int32_t> input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0.2, 8.6, 2.4, 4.3, // Row 1 + 3, 7.1, 10.5, -0.9, // Row 2 + }; + + std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1}; + std::vector<float> ref_output_data{ + 0, 8, 2, 4, // Row 1 + 3, 7, 10, -1, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Floor kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Floor kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp new file mode 100644 index 000000000..a7a10a336 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorDiv.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/binary_function.h> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output) + : Kernel({input, alpha}, {output}) +{ +} + +void FloorDiv::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void FloorDiv::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void FloorDiv::evalFloat() const +{ + auto FloorDivFunc = [](float x, float y) -> float { + return std::floor(static_cast<double>(x) / static_cast<double>(y)); + }; + + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + + // Check the denominator + for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i) + { + LUCI_INTERPRETER_CHECK(y_data[i] != 0); + } + + if (x()->shape() != y()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<float>(output()), FloorDivFunc); + } + else + { + tflite::reference_ops::BinaryFunction<float, float, float>( + getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + getTensorData<float>(output()), FloorDivFunc); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h new file mode 100644 index 000000000..e9c47d81a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H +#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class FloorDiv : public Kernel +{ +public: + FloorDiv(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp new file mode 100644 index 000000000..3e1b5f18e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FloorDiv.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class FloorDivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorDivTest, FloatSimple) +{ + Shape x_shape{2, 3}; + std::vector<float> x_data{ + 0.5, 2.4, 3.1, // Row 1 + 1.9, -1.9, -2.8, // Row 2 + }; + + Shape y_shape = x_shape; + std::vector<float> y_data{ + 2.0, 0.5, 3.0, // Row 1 + 1.0, -1.0, -2.0, // Row 2 + }; + + std::vector<int32_t> ref_output_shape{2, 3}; + std::vector<float> ref_output_data{ + 0, 4, 1, // Row 1 + 1, 1, 1, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorDivTest, FloatBroadcast) +{ + Shape x_shape{1, 3}; + std::vector<float> x_data{ + 0.5, 2.4, -3.1, // Row 1 + }; + + Shape y_shape{3, 3}; + std::vector<float> y_data{ + 1.0, 1.0, 1.0, // Row 1 + 2.0, -0.5, -2.0, // Row 2 + 0.3, 0.7, 0.9, // Row 3 + }; + + std::vector<int32_t> ref_output_shape{3, 3}; + std::vector<float> ref_output_data{ + 0, 2, -4, // Row 1 + 0, -5, 1, // Row 2 + 1, 3, -4, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(FloorDivTest, DivByZero_NEG) +{ + Shape shape{3}; + std::vector<float> x_data{1, 0, -1}; + std::vector<float> y_data{0, 0, 0}; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(FloorDivTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp new file mode 100644 index 000000000..bd2bb2f35 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FullyConnected.h" + +#include "kernels/Utils.h" + +#include "PALFullyConnected.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, + Tensor *output, const FullyConnectedParams ¶ms) + : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params) +{ +} + +void FullyConnected::configure() +{ + if (weights()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } + else if (weights()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32) + } + else if (weights()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } + else + { + throw std::runtime_error("Unsupported type."); + } + + const Shape &input_shape = input()->shape(); + const Shape &weights_shape = weights()->shape(); + + LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(bias() == nullptr || + bias()->shape().num_elements() == weights_shape.dim(0)); + + LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0); + const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1); + const int32_t num_units = weights_shape.dim(0); + + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0)); + + if (params().keep_num_dims == false) + { + output()->resize({batch_size, num_units}); + } + else + { + luci_interpreter::Shape output_shape(input_shape.num_dims()); + for (int i = 0; i < input_shape.num_dims(); ++i) + output_shape.dim(i) = input_shape.dim(i); + output_shape.dim(input_shape.num_dims() - 1) = num_units; + output()->resize(output_shape); + } +} + +void FullyConnected::execute() const +{ + switch (input()->element_type()) + { + case DataType::U8: + evalQuantized(); + break; + case DataType::S8: + evalQuantizedS8(); + break; + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void FullyConnected::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::FullyConnectedParams params{}; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault; + + tflite::reference_ops::FullyConnected( + params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()), + getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void FullyConnected::evalQuantized() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + tflite::reference_ops::FullyConnected( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()), + getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void FullyConnected::evalQuantizedS8() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + luci_interpreter_pal::FullyConnected<int8_t>( + op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()), + getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<int8_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h new file mode 100644 index 000000000..2a7c068c0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H +#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class FullyConnected : public KernelWithParams<FullyConnectedParams> +{ +public: + FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output, + const FullyConnectedParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *weights() const { return _inputs[1]; } + const Tensor *bias() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS8() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp new file mode 100644 index 000000000..4474cc4fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/FullyConnected.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check<int8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <> +void Check<uint8_t>( + std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <typename T> class FullyConnectedTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(FullyConnectedTest, DataTypes); + +TYPED_TEST(FullyConnectedTest, Simple) +{ + Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3}, + { + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }, + { + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }, + {-1, -5, -8}, + { + 0, 0, 32, // batch = 0 + 22, 11, 47, // batch = 1 + }); +} + +TEST(FullyConnectedTest, InvalidBiasType_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector<float> input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{3, 6}; + std::vector<float> weights_data{ + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }; + Shape bias_shape{3}; + std::vector<int32_t> bias_data{-1, -5, -8}; + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector<float> input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{1, 3, 6}; + std::vector<float> weights_data{ + -3, -7, 4, -4, -6, 4, // unit = 0 + 3, 5, 2, 3, -3, -8, // unit = 1 + -3, 7, 4, 9, 0, -5, // unit = 2 + }; + Shape bias_shape{3}; + std::vector<float> bias_data{-1, -5, -8}; + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG) +{ + Shape input_shape{3, 2, 2, 1}; + std::vector<float> input_data{ + -3, -5, 5, 4, 9, -2, // batch = 0 + -3, -2, -4, 9, -8, 1, // batch = 1 + }; + Shape weights_shape{6, 3}; + std::vector<float> weights_data{ + -3, -7, 4, // unit = 0 + -4, -6, 4, // unit = 1 + 3, 5, 2, // unit = 2 + 3, -3, -8, // unit = 3 + -3, 7, 4, // unit = 4 + 9, 0, -5, // unit = 5 + }; + Shape bias_shape{3}; + std::vector<float> bias_data{-1, -5, -8}; + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp new file mode 100644 index 000000000..f1256660f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2021 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/Utils.h" +#include "PALGather.h" + +#include <stdexcept> +#include <cassert> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output, + const GatherParams &gparams) + : KernelWithParams<GatherParams>({params, indices}, {output}, gparams) +{ +} + +void Gather::configure() +{ + if (params()->element_type() == DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 || + indices()->element_type() == DataType::S64); + + // refer tensorflow/lite/kernels/gather.cc + + const Shape ¶ms_shape = params()->shape(); + const Shape &indices_shape = indices()->shape(); + + int axis = _params.axis; + if (axis < 0) + { + axis += params_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims()); + + int batch_dims = _params.batch_dims; + // batch_dims should be in range: [-rank(indices), rank(indices)]. + // Negative batch_dims is added with rank of positions. + if (batch_dims < 0) + { + batch_dims += indices_shape.num_dims(); + } + LUCI_INTERPRETER_CHECK(batch_dims <= axis); + LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims()); + LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims()); + for (int i = 0; i < batch_dims; ++i) + { + LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i)); + } + + const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims; + + Shape output_shape(num_dimensions); + int output_index = 0; + for (int i = 0; i < axis; ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + for (int i = batch_dims; i < indices_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = indices_shape.dim(i); + } + for (int i = axis + 1; i < params_shape.num_dims(); ++i) + { + output_shape.dim(output_index++) = params_shape.dim(i); + } + output()->resize(output_shape); +} + +void Gather::execute() const +{ + switch (params()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Gather::evalFloat() const +{ + assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64); + + const auto params_data = getTensorData<float>(params()); + auto output_data = getTensorData<float>(output()); + + tflite::GatherParams tparams; + tparams.axis = _params.axis; + tparams.batch_dims = _params.batch_dims; + + if (indices()->element_type() == DataType::S32) + { + const auto indices_data = getTensorData<int32_t>(indices()); + + luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } + else + { + const auto indices_data = getTensorData<int64_t>(indices()); + + luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data, + getTensorShape(indices()), indices_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h new file mode 100644 index 000000000..cc02d64fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H +#define LUCI_INTERPRETER_KERNELS_GATHER_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Gather : public KernelWithParams<GatherParams> +{ +public: + Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams); + + const Tensor *params() const { return _inputs[0]; } + const Tensor *indices() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GATHER_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp new file mode 100644 index 000000000..4b3dda708 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Gather.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GatherTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GatherTest, Simple) +{ + std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector<int32_t> indices_data{1, 0, 1, 5}; + std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4})); +} + +TEST_F(GatherTest, Simple_Batch) +{ + Shape params_shape = {3, 5}; + Shape indices_shape = {3, 2}; + std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.}; + std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3}; + std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get()); + Tensor indices_tensor = + makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 1; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2})); +} + +TEST_F(GatherTest, Simple_NEG) +{ + Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Axis_NEG) +{ + Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 100; + gparams.batch_dims = 0; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GatherTest, Batch_NEG) +{ + std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; + std::vector<int32_t> indices_data{1, 0, 1, 5}; + std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f}; + + Tensor params_tensor = + makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get()); + Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + GatherParams gparams; + + gparams.axis = 0; + gparams.batch_dims = 1; + + Gather kernel(¶ms_tensor, &indices_tensor, &output_tensor, gparams); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp new file mode 100644 index 000000000..5ccae3c38 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Greater.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Greater::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Greater::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Greater::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void Greater::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Greater::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h new file mode 100644 index 000000000..065f76d7b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H +#define LUCI_INTERPRETER_KERNELS_GREATER_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Greater : public Kernel +{ +public: + Greater(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GREATER_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp new file mode 100644 index 000000000..a48080124 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Greater.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GreaterTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GreaterTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, false, true, // Row 1 + true, false, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(GreaterTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, false, true, // Row 1 + true, false, false, // Row 2 + false, false, true, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, false, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(GreaterTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, false, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, false, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, false, // Row 1 + true, true, false, false, // Row 2 + true, false, true, false, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Greater kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp new file mode 100644 index 000000000..27e42c971 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/GreaterEqual.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output) + : Kernel({x, y}, {output}) +{ +} + +void GreaterEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void GreaterEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void GreaterEqual::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void GreaterEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +void GreaterEqual::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h new file mode 100644 index 000000000..e333c30a6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class GreaterEqual : public Kernel +{ +public: + GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp new file mode 100644 index 000000000..35bf88eab --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/GreaterEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class GreaterEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GreaterEqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(GreaterEqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, false, false, // Row 2 + false, false, true, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value - 1, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, // Row 1 + true, true, true, // Row 2 + true, false, false, // Row 3 + false, true, true, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(GreaterEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(GreaterEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(GreaterEqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.5, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + false, true, true, true, // Row 1 + true, false, true, false, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, false, // Row 1 + true, true, true, false, // Row 2 + true, false, true, false, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(GreaterEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp new file mode 100644 index 000000000..971708bca --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/If.h" +#include "kernels/Utils.h" + +#include <cstring> + +namespace luci_interpreter +{ +namespace kernels +{ + +static std::vector<const Tensor *> joinInputs(const Tensor *cond, + const std::vector<const Tensor *> &inputs) +{ + std::vector<const Tensor *> result{cond}; + result.insert(result.cend(), inputs.cbegin(), inputs.cend()); + return result; +} + +If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs, + RuntimeGraph *then_graph, RuntimeGraph *else_graph) + : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph), + _else_graph(else_graph) +{ +} + +void If::configure() +{ + LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL); + LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1); + + for (RuntimeGraph *graph : {_then_graph, _else_graph}) + { + (void)graph; + LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1); + LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size()); + } +} + +void If::execute() const +{ + const bool cond_value = cond()->data<bool>()[0]; + + RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph; + const auto &graph_inputs = active_graph->getInputTensors(); + const auto &graph_outputs = active_graph->getOutputTensors(); + + // Copy kernel inputs to active graph inputs. + for (size_t i = 0; i < getInputTensors().size() - 1; ++i) + { + LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type()); + graph_inputs[i]->resize(input(i)->shape()); + + const int32_t num_elements = input(i)->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(input(i)->element_type()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(graph_inputs[i]); + std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size); + } + + active_graph->execute(); + + // Copy graph outputs to kernel outputs. + for (size_t i = 0; i < getOutputTensors().size(); ++i) + { + LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type()); + output(i)->resize(graph_outputs[i]->shape()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(output(i)); + + const int32_t num_elements = output(i)->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(output(i)->element_type()); + std::memcpy(output(i)->data<void>(), graph_outputs[i]->data<void>(), + num_elements * element_size); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.h b/compiler/luci-micro/luci-interpreter/src/kernels/If.h new file mode 100644 index 000000000..fa6ab371a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_IF_H +#define LUCI_INTERPRETER_KERNELS_IF_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class If : public Kernel +{ +public: + If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs, + RuntimeGraph *then_graph, RuntimeGraph *else_graph); + + const Tensor *cond() const { return _inputs[0]; } + const Tensor *input(int index) const { return _inputs[1 + index]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + RuntimeGraph *const _then_graph; + RuntimeGraph *const _else_graph; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_IF_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp new file mode 100644 index 000000000..c5f4faf75 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeModule.h" +#include "kernels/Add.h" +#include "kernels/If.h" +#include "kernels/Mul.h" +#include "kernels/TestUtils.h" + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class IfTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input1 = graph->addTensor( + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *input2 = graph->addTensor( + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *output = graph->addTensor( + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input1, input2}); + graph->setOutputTensors({output}); + + AddParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique<Add>(input1, input2, output, params)); + + return graph; +} + +RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input1 = graph->addTensor( + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *input2 = graph->addTensor( + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + Tensor *output = graph->addTensor( + std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input1, input2}); + graph->setOutputTensors({output}); + + MulParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique<Mul>(input1, input2, output, params)); + + return graph; +} + +TEST_F(IfTest, CondTrue) +{ + Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9})); +} + +TEST_F(IfTest, CondFalse) +{ + Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14})); +} + +TEST_F(IfTest, InvalidCondType_NEG) +{ + Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(IfTest, InvalidCondElementNum_NEG) +{ + Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + RuntimeModule module(nullptr); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); + + If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp new file mode 100644 index 000000000..22a329be6 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/InstanceNorm.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/common.h> +#include <cmath> + +namespace luci_interpreter +{ +namespace kernels +{ + +InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, + Tensor *output, const InstanceNormParams ¶ms) + : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params) +{ +} + +void InstanceNorm::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) || + gamma()->shape().dim(0) == 1); + LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type()); + LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) || + beta()->shape().dim(0) == 1); + output()->resize(input()->shape()); +} + +void InstanceNorm::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void InstanceNorm::evalFloat() const +{ + float activation_min, activation_max; + calculateActivationRange(params().activation, &activation_min, &activation_max); + auto input_shape = getTensorShape(input()); + auto output_shape = getTensorShape(output()); + const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0); + const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1); + const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2); + const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3); + const float *input_data = getTensorData<float>(input()); + const float *gamma_data = getTensorData<float>(gamma()); + auto gamma_shape = getTensorShape(gamma()); + bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1; + const float *beta_data = getTensorData<float>(beta()); + auto beta_shape = getTensorShape(beta()); + bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1; + float *output_data = getTensorData<float>(output()); + for (int32_t batch = 0; batch < batches; batch++) + { + for (int32_t channel = 0; channel < channels; channel++) + { + double sum = 0.0f; + double square_sum = 0.0f; + int32_t size = heights * widths; + for (int32_t height = 0; height < heights; height++) + { + for (int32_t width = 0; width < widths; width++) + { + double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)]; + sum += input_val; + square_sum += (input_val * input_val); + } + } + double mean = sum / size; + double var = square_sum / size - mean * mean; + + double gamma = single_gamma ? gamma_data[0] : gamma_data[channel]; + double beta = single_beta ? beta_data[0] : beta_data[channel]; + double a = gamma / (std::sqrt(var + params().epsilon)); + double b = -mean * a + beta; + + for (int32_t height = 0; height < heights; height++) + { + for (int32_t width = 0; width < widths; width++) + { + double input_value = + input_data[tflite::Offset(output_shape, batch, height, width, channel)]; + double output_value = input_value * a + b; + output_data[tflite::Offset(output_shape, batch, height, width, channel)] = + tflite::ActivationFunctionWithMinMax((float)output_value, activation_min, + activation_max); + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h new file mode 100644 index 000000000..a70a84e0a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H +#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class InstanceNorm : public KernelWithParams<InstanceNormParams> +{ +public: + InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output, + const InstanceNormParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *gamma() const { return _inputs[1]; } + const Tensor *beta() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp new file mode 100644 index 000000000..04400c3c0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernels/InstanceNorm.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class InstanceNormTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(InstanceNormTest, Simple) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); +} + +TEST_F(InstanceNormTest, Single_gamma_beta) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2})); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2})); +} + +TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + InstanceNormParams params{}; + params.epsilon = 0.1f; + params.activation = Activation::NONE; + + InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp new file mode 100644 index 000000000..64222953f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/L2Normalize.h" +#include "kernels/Utils.h" + +#include "PALL2Normalize.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms) + : KernelWithParams<L2NormParams>({input}, {output}, params) +{ +} + +void L2Normalize::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (output()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.)); + LUCI_INTERPRETER_CHECK(output()->zero_point() == 128); + } + LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE); + output()->resize(input()->shape()); +} + +void L2Normalize::execute() const +{ + switch (output()->element_type()) + { + case DataType::FLOAT32: + eval<float>(0); + break; + case DataType::U8: + eval<uint8_t>(input()->zero_point()); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void L2Normalize::eval(int32_t zero_point) const +{ + tflite::L2NormalizationParams op_params{}; + op_params.input_zero_point = zero_point; + luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()), + getTensorData<T>(input()), getTensorShape(output()), + getTensorData<T>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h new file mode 100644 index 000000000..6c7dac698 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H +#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class L2Normalize : public KernelWithParams<L2NormParams> +{ +public: + L2Normalize(const Tensor *input, Tensor *output, const L2NormParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void eval(int32_t zero_point) const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp new file mode 100644 index 000000000..6f960e8b4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernels/L2Normalize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::pair<float, int32_t> quant_param = + quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f, + std::max(input_data) > 0 ? std::max(input_data) : 0.f); + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class L2NormalizeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(L2NormalizeTest, DataTypes); + +TYPED_TEST(L2NormalizeTest, Simple) +{ + Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, + {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}); +} + +TEST(L2NormalizeTest, ActivationType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + L2NormParams params{}; + params.activation = Activation::RELU6; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; + + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127); + + L2NormParams params{}; + params.activation = Activation::NONE; + + L2Normalize kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp new file mode 100644 index 000000000..5a88808d5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/L2Pool2D.h" + +#include "kernels/Utils.h" + +#include "PALL2Pool2D.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) + : KernelWithParams<Pool2DParams>({input}, {output}, params) +{ +} + +void L2Pool2D::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int batches = input()->shape().dim(0); + int height = input()->shape().dim(1); + int width = input()->shape().dim(2); + int channels_out = input()->shape().dim(3); + + // Matching GetWindowedOutputSize in TensorFlow. + auto padding = params().padding; + int out_width, out_height; + out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1); + out_height = + computeOutputSize(padding, height, params().filter_height, params().stride_height, 1); + _padding_width = + computePadding(params().stride_width, 1, width, params().filter_width, out_width); + _padding_height = + computePadding(params().stride_height, 1, height, params().filter_height, out_height); + + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32); + output()->resize({batches, out_height, out_width, channels_out}); +} + +void L2Pool2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + float activation_min, activation_max; + calculateActivationRange(params().activation, &activation_min, &activation_max); + tflite::PoolParams op_params; + op_params.stride_height = params().stride_height; + op_params.stride_width = params().stride_width; + op_params.filter_height = params().filter_height; + op_params.filter_width = params().filter_width; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; + op_params.float_activation_min = activation_min; + op_params.float_activation_max = activation_max; + luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h new file mode 100644 index 000000000..d40f5f478 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H +#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class L2Pool2D : public KernelWithParams<Pool2DParams> +{ +public: + L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + int32_t _padding_height = 0; + int32_t _padding_width = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp new file mode 100644 index 000000000..7245456cb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/L2Pool2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class L2Pool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(L2Pool2DTest, FloatNone) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{3.5, 6.5}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatRelu) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + -1, -6, 2, 4, // + -3, -2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::RELU; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{3.53553, 6.5}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatRelu1) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + -0.1, -0.6, 2, 4, // + -0.3, -0.2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::RELU_N1_TO_1; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.353553, 1.0}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatRelu6) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + -0.1, -0.6, 2, 4, // + -0.3, -0.2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::RELU6; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.353553, 6.0}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatPaddingSame) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::SAME; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{3.5, 6.5}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatPaddingSameStride) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::SAME; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0}; + // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05 + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, FloatPaddingValidStride) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{3.5, 6.0, 6.5}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + // TODO make a Shape checking of output_tensor. +} + +TEST_F(L2Pool2DTest, InvalidInputShape_NEG) +{ + Shape input_shape{1, 2, 4}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG) +{ + Shape input_shape{1, 2, 4}; + std::vector<float> input_data{ + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.activation = Activation::NONE; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 1; + params.stride_width = 1; + + L2Pool2D kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp new file mode 100644 index 000000000..3833a55e8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LeakyRelu.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h> + +#include "PALLeakyRelu.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms) + : KernelWithParams<LeakyReluParams>({input}, {output}, params) +{ +} + +void LeakyRelu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + double alpha_multiplier = input()->scale() * params().alpha / output()->scale(); + quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha); + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + output()->resize(input()->shape()); +} + +void LeakyRelu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LeakyRelu::evalFloat() const +{ + tflite::LeakyReluParams op_params{}; + op_params.alpha = params().alpha; + luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void LeakyRelu::evalQuantized() const +{ + tflite::LeakyReluParams op_params{}; + op_params.input_offset = input()->zero_point(); + op_params.output_offset = output()->zero_point(); + op_params.output_multiplier_alpha = _output_multiplier_alpha; + op_params.output_shift_alpha = _output_shift_alpha; + op_params.output_multiplier_identity = _output_multiplier_identity; + op_params.output_shift_identity = _output_shift_identity; + + tflite::reference_ops::QuantizeLeakyRelu( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h new file mode 100644 index 000000000..e66f404df --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LEAKYRELU_H +#define LUCI_INTERPRETER_KERNELS_LEAKYRELU_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LeakyRelu : public KernelWithParams<LeakyReluParams> +{ +public: + LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + +private: + int32_t _output_multiplier_alpha = 0; + int _output_shift_alpha = 0; + int32_t _output_multiplier_identity = 0; + int _output_shift_identity = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LEAKYRELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp new file mode 100644 index 000000000..0f6263b57 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LeakyRelu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data, + float alpha) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + LeakyReluParams params{}; + params.alpha = alpha; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> output_data, float alpha) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + LeakyReluParams params{}; + params.alpha = alpha; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <typename T> class LeakReluTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(LeakReluTest, DataTypes); + +TYPED_TEST(LeakReluTest, Simple) +{ + Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -0.5f, -1.0f, // Row 2 + }, + /*alpha=*/0.5f); + + SUCCEED(); +} + +TEST(LeakReluTest, IvalidInputOutputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + LeakyReluParams params{}; + params.alpha = 0.5f; + + LeakyRelu kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp new file mode 100644 index 000000000..8d26ff297 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Less.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void Less::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void Less::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Less::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +template <typename T> void Less::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void Less::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.h b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h new file mode 100644 index 000000000..e27bb689c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LESS_H +#define LUCI_INTERPRETER_KERNELS_LESS_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Less : public Kernel +{ +public: + Less(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LESS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp new file mode 100644 index 000000000..8c5963363 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Less.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LessTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LessTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, false, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(LessTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, false, // Row 1 + false, true, true, // Row 2 + true, true, false, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(LessTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, true, // Row 1 + false, false, false, true, // Row 2 + false, true, false, true, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Less kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp new file mode 100644 index 000000000..b474bc47a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LessEqual.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void LessEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void LessEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LessEqual::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void LessEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void LessEqual::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h new file mode 100644 index 000000000..f82ea90d4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LessEqual : public Kernel +{ +public: + LessEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp new file mode 100644 index 000000000..b2e2fa7a1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LessEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LessEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LessEqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, true, false, // Row 1 + false, true, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(LessEqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, true, false, // Row 1 + false, true, true, // Row 2 + true, true, false, // Row 3 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value + 1, -2, max_value}; + + std::vector<bool> ref_output_data{true, false, true}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -4, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value + 1, -2, max_value - 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + false, false, false, // Row 2 + false, true, true, // Row 3 + true, true, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(LessEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(LessEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(LessEqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.55, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, true, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Uint8QuantizedRescale) +{ + std::vector<float> x_data{ + 0.5, 0.6, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.6, 0.6, 0.5, // Row 1 + -1, 0.05, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, true, false, false, // Row 1 + false, true, false, true, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); + + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, false, true, // Row 1 + false, false, true, true, // Row 2 + false, true, false, true, // Row 3 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(LessEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(LessEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp new file mode 100644 index 000000000..a2bf442b0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LocalResponseNormalization.h" + +#include "kernels/Utils.h" + +#include "PALLocalResponseNormalization.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +LocalResponseNormalization::LocalResponseNormalization( + const Tensor *input, Tensor *output, const LocalResponseNormalizationParams ¶ms) + : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params) +{ +} + +void LocalResponseNormalization::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void LocalResponseNormalization::execute() const +{ + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::LocalResponseNormalizationParams op_params; + op_params.range = params().radius; + op_params.bias = params().bias; + op_params.alpha = params().alpha; + op_params.beta = params().beta; + luci_interpreter_pal::LocalResponseNormalization( + op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h new file mode 100644 index 000000000..60408a104 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H +#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LocalResponseNormalization : public KernelWithParams<LocalResponseNormalizationParams> +{ +public: + LocalResponseNormalization(const Tensor *input, Tensor *output, + const LocalResponseNormalizationParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp new file mode 100644 index 000000000..4a9d4739f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LocalResponseNormalization.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LocalResponseNormalizationTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LocalResponseNormalizationTest, SameAsL2Norm) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); +} + +TEST_F(LocalResponseNormalizationTest, WithAlpha) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 4.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})); +} + +TEST_F(LocalResponseNormalizationTest, WithBias) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 9.0; + params.alpha = 4.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})); +} + +TEST_F(LocalResponseNormalizationTest, SmallRadius) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 2; + params.bias = 9.0; + params.alpha = 4.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})); +} + +TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + LocalResponseNormalizationParams params{}; + params.radius = 20; + params.bias = 0.0; + params.alpha = 1.0; + params.beta = 0.5; + + LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp new file mode 100644 index 000000000..79c315338 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogSoftmax.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/log_softmax.h> + +#include "PALLogSoftmax.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void LogSoftmax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256); + LUCI_INTERPRETER_CHECK(output()->zero_point() == 255); + + tflite::SoftmaxParams params{}; + + params.table = _table; + params.beta = 1.0; + luci_interpreter_pal::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta); + } + output()->resize(input()->shape()); +} + +void LogSoftmax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void LogSoftmax::evalFloat() const +{ + tflite::SoftmaxParams params{}; + tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void LogSoftmax::evalQuantized() const +{ + const auto input_shape = getTensorShape(input()); + const auto output_shape = getTensorShape(output()); + const auto input_scale = input()->scale(); + uint8_t *output_data = getTensorData<uint8_t>(output()); + const uint8_t *input_data = getTensorData<uint8_t>(input()); + const float beta = 1.0; + + tflite::SoftmaxParams params{}; + + params.table = const_cast<float *>(_table); + params.zero_point = output()->zero_point(); + params.scale = output()->scale(); + + luci_interpreter_pal::InitializeParams(¶ms, input_scale, beta); + luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h new file mode 100644 index 000000000..18477fbe3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H +#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogSoftmax : public Kernel +{ +public: + LogSoftmax(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + + float _table[256]; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp new file mode 100644 index 000000000..50dcd5c28 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogSoftmax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogSoftmaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogSoftmaxTest, Float) +{ + Shape input_shape{2, 4}; + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + LogSoftmax kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + -4.14297, -10.14297, -2.14297, -.142971, // + -7.00104, -12.00104, -.00104087, -9.00104, // + }; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(LogSoftmaxTest, Uint8) +{ + float kMin = -10; + float kMax = 10; + float kLogSoftmaxQuantizedTolerance = 16. / 256; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax); + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + -4.14297, -10.14297, -2.14297, -.142971, // + -7.00104, -12.00104, -.00104087, -9.00104, // + }; + std::vector<int32_t> ref_output_shape{2, 4}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111})); +} + +TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG) +{ + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10); + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255); + + LogSoftmax kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp new file mode 100644 index 000000000..8e7263231 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalAnd.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void LogicalAnd::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void LogicalAnd::execute() const +{ + switch (input1()->element_type()) + { + case DataType::BOOL: + evalLogicalAnd(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +inline void LogicalAnd::evalLogicalAnd() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()), + getTensorShape(input2()), getTensorData<bool>(input2()), + getTensorShape(output()), getTensorData<bool>(output()), + [](bool x, bool y) { return x && y; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h new file mode 100644 index 000000000..46b889986 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H +#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalAnd : public Kernel +{ +public: + LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + inline void evalLogicalAnd() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp new file mode 100644 index 000000000..21b7951e0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalAnd.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalAndTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalAndTest, Basic) +{ + Shape input_shape{1, 1, 1, 4}; + Tensor input_tensor1 = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, false, false)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalAndTest, Broadcast) +{ + Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, false, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalAndTest, MismatchInputType_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalAndTest, InputTypeInvalid_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp new file mode 100644 index 000000000..65ab961aa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalNot.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void LogicalNot::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + output()->resize(input()->shape()); +} + +void LogicalNot::execute() const +{ + switch (input()->element_type()) + { + case DataType::BOOL: + evalLogicalNot(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +inline void LogicalNot::evalLogicalNot() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + bool *output_data = getTensorData<bool>(output()); + const bool *input_data = getTensorData<bool>(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = !input_data[i]; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h new file mode 100644 index 000000000..1608fafa5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H +#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalNot : public Kernel +{ +public: + LogicalNot(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + inline void evalLogicalNot() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp new file mode 100644 index 000000000..3cbf27f6b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalNot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalNotTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalNotTest, Basic) +{ + Shape input_shape{1, 1, 1, 4}; + Tensor input_tensor = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalNot kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(false, true, true, false)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalNotTest, OutputTypeInvalid_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalNot kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalNotTest, InputTypeInvalid_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalNot kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp new file mode 100644 index 000000000..f289ca64f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalOr.h" + +#include "kernels/Utils.h" +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void LogicalOr::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL); + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void LogicalOr::execute() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()), + getTensorShape(input2()), getTensorData<bool>(input2()), + getTensorShape(output()), getTensorData<bool>(output()), + [](bool x, bool y) { return x || y; }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h new file mode 100644 index 000000000..88606483f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H +#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class LogicalOr : public Kernel +{ +public: + LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp new file mode 100644 index 000000000..d65a69a5e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/LogicalOr.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class LogicalOrTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalOrTest, Basic) +{ + Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false}, + _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, true, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalOrTest, Broadcast) +{ + Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), + ::testing::ElementsAre(true, false, false, true)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); +} + +TEST_F(LogicalOrTest, MismatchInputType_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::S32); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(LogicalOrTest, InputTypeInvalid_NEG) +{ + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp new file mode 100644 index 000000000..58e4f185d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Logistic.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/logistic.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Logistic::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256); + populateLookupTable(); + } + output()->resize(input()->shape()); +} + +void Logistic::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Logistic::evalFloat() const +{ + tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void Logistic::evalQuantized() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + uint8_t *output_data = getTensorData<uint8_t>(output()); + const uint8_t *input_data = getTensorData<uint8_t>(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = getTableValue(input_data[i]); + } +} + +void Logistic::populateLookupTable() +{ + const auto input_scale = static_cast<double>(input()->scale()); + const auto input_zero_point = static_cast<int32_t>(input()->zero_point()); + const auto output_scale = static_cast<double>(output()->scale()); + const auto output_zero_point = static_cast<int32_t>(output()->zero_point()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits<uint8_t>::max(); + int32_t minval = std::numeric_limits<uint8_t>::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + const float transformed = 1.0f / (1.0f + std::exp(-dequantized)); + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); + setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)), + static_cast<uint8_t>(val)); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h new file mode 100644 index 000000000..31de6adf0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H +#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Logistic : public Kernel +{ +public: + Logistic(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void populateLookupTable(); + void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; }; + uint8_t getTableValue(uint8_t idx) const { return _table[idx]; }; + +private: + uint8_t _table[256]{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp new file mode 100644 index 000000000..5a1ea669c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Logistic.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(getElementType<T>()); + + Logistic kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> input_quant_param = + quantizationParams<uint8_t>(std::min(input_data), std::max(input_data)); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); + + Logistic kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale() * 2)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class LogisticTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(LogisticTest, DataTypes); + +TYPED_TEST(LogisticTest, Simple) +{ + Check<TypeParam>( + {89}, {89}, + {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636, + -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000, + -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364, + -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727, + -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091, + -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455, + -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818, + -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818, + 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455, + 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091, + 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727, + 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364, + 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000, + 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636, + 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000}, + {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198, + 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786, + 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065, + 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576, + 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562, + 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805, + 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241, + 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759, + 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195, + 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438, + 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424, + 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935, + 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214, + 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802, + 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021}); +} + +TEST(LogisticTest, IvalidInputOutputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape = {1}; + std::vector<float> input_data{10}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); + + Logistic kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(LogisticTest, IvalidQuantParam_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape = {2}; + std::vector<float> input_data{-10, 10}; + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0); + + Logistic kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp new file mode 100644 index 000000000..8d9760ff2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MaxPool2D.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h> +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms) + : KernelWithParams<Pool2DParams>({input}, {output}, params) +{ +} + +void MaxPool2D::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + assert(input()->shape().num_dims() == 4); + const Shape &input_shape = input()->shape(); + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + + const int32_t output_height = + computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height); + const int32_t output_width = + computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width); + + _padding_height = + computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height); + _padding_width = + computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width); + + output()->resize({batches, output_height, output_width, depth}); + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } + else if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } +} + +void MaxPool2D::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalSInt16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void MaxPool2D::evalFloat() const +{ + float activation_min{}; + float activation_max{}; + calculateActivationRange(_params.activation, &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.float_activation_min = activation_min; + params.float_activation_max = activation_max; + + tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void MaxPool2D::evalQuantized() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void MaxPool2D::evalSInt16() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::MaxPool( + params, getTensorShape(input()), getTensorData<int16_t>(input()), // + getTensorShape(output()), getTensorData<int16_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h new file mode 100644 index 000000000..bb7666305 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H +#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class MaxPool2D : public KernelWithParams<Pool2DParams> +{ +public: + MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalSInt16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp new file mode 100644 index 000000000..44f2a222f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MaxPool2D.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MaxPool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MaxPool2DTest, Float) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<float> input_data{ + 1, -1, 0, -2, 2, // + -7, -6, -5, -4, -3, // + 5, 4, 3, 6, 7, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 1, 2, // + 5, 6, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MaxPool2DTest, Uint8) +{ + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375); + std::vector<float> input_data{ + 0, -6, 12, 4, // + -3, -2, 10, 7, // + }; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 2; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.0, 6.0}; + std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MaxPool2DTest, SInt16) +{ + Shape input_shape{1, 3, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{ + 1, -1, 0, -2, 2, // + -7, -6, -5, -4, -3, // + 5, 4, 3, 6, 7, // + }; + std::vector<float> ref_output_data{ + 1, 2, // + 5, 6, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 1; + params.stride_width = 2; + params.activation = Activation::RELU6; + + MaxPool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp new file mode 100644 index 000000000..b102b5e27 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Maximum.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Maximum::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Maximum::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalMaximum<float>(); + break; + case DataType::U8: + evalMaximum<uint8_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> inline void Maximum::evalMaximum() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output()), + [](T x, T y) { return std::max(x, y); }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h new file mode 100644 index 000000000..3c99e69c7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H +#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Maximum : public Kernel +{ +public: + Maximum(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> inline void evalMaximum() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp new file mode 100644 index 000000000..e4a505b03 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Maximum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MaximumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MaximumTest, Float) +{ + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(MaximumTest, Uint8) +{ + Shape input_shape{3, 1, 2}; + std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23}; + std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1}; + Tensor input_tensor1 = + makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> ref_output_shape{2, 4}; + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp new file mode 100644 index 000000000..8e65e0d6d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mean.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reduce.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params) +{ + params->axis_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + params->axis[i] = static_cast<int16>(axes_data[i]); + } + for (int i = num_axes; i < 4; ++i) + { + params->axis[i] = 1; + } +} + +// Returns the number of axes that will be reduced. Removes duplicates. +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) +{ + int reduction_count = num_axes; + for (int i = 0; i < num_axes; ++i) + { + int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims; + assert(current >= 0 && current < input_num_dims); + for (int j = 0; j < i; j++) + { + int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims; + // This checks for duplicate axis + if (current == previous) + { + --reduction_count; + break; + } + } + } + return reduction_count; +} + +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, + bool keep_dims) +{ + int input_num_dims = input_shape.num_dims(); + if (input_num_dims == 0) + { + return Shape(0); + } + + if (keep_dims) + { + Shape output_shape(input_num_dims); + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + is_axis = true; + break; + } + } + if (is_axis) + { + output_shape.dim(idx) = 1; + } + else + { + output_shape.dim(idx) = input_shape.dim(idx); + } + } + return output_shape; + } + else + { + int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims); + Shape output_shape(input_num_dims - num_reduce_axes); + int num_skip_axes = 0; + for (int idx = 0; idx < input_num_dims; ++idx) + { + bool is_axis = false; + for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx) + { + if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx) + { + ++num_skip_axes; + is_axis = true; + break; + } + } + if (!is_axis) + { + output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx); + } + } + return output_shape; + } +} + +Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum}, + params) +{ +} + +void Mean::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32); + if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + assert(num_axes <= 4); + + Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims); + output()->resize(output_shape); + + tflite::MeanParams params{}; + resolveAxes(axes_data, num_axes, ¶ms); + _need_temporaries = !( + _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1))); + if (_need_temporaries) + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); + temp_sum->resize(output()->shape()); + } + else + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->set_allocatable(false); + resolved_axes->set_allocatable(false); + temp_sum->set_allocatable(false); + } +} + +void Mean::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Mean::evalFloat() const +{ + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + + tflite::MeanParams params{}; + resolveAxes(axes_data, num_axes, ¶ms); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || + (params.axis[0] == 2 && params.axis[1] == 1))) + { + tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); + } + else + { + tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData<float>(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<float>(temp_sum)); + } +} + +void Mean::evalQuantized() const +{ + const Shape &input_shape = input()->shape(); + int input_num_dims = input_shape.num_dims(); + const auto *axes_data = getTensorData<int32_t>(axes()); + int num_axes = axes()->shape().num_elements(); + + tflite::MeanParams params{}; + resolveAxes(axes_data, num_axes, ¶ms); + + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && + ((params.axis[0] == 1 && params.axis[1] == 2) || + (params.axis[0] == 2 && params.axis[1] == 1))) + { + tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + input()->zero_point(), input()->scale(), getTensorShape(output()), + getTensorData<uint8_t>(output()), output()->zero_point(), + output()->scale()); + } + else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale()) + { + tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData<uint8_t>(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<int>(temp_sum)); + } + else + { + tflite::reference_ops::QuantizedMeanOrSum<>( + getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(), + getTensorShape(input()).DimsData(), input()->shape().num_dims(), + getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, + _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<int>(temp_sum), + /*compute_sum=*/false); + } +} + +void Mean::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + auto *output_data = getTensorData<int16_t>(output()); + + const Shape &input_shape = input()->shape(); + const Shape &output_shape = output()->shape(); + + const auto *axes_data = getTensorData<int32_t>(axes()); + const int num_axes = axes()->shape().num_elements(); + + constexpr int32_t output_min = -std::numeric_limits<int16_t>::max(); + constexpr int32_t output_max = std::numeric_limits<int16_t>::max(); + + // Defer to specialized implementation for 4D Mean across axes 1 & 2. + if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 && + ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1))) + { + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t depth = input_shape.dim(3); + assert(output_shape.num_dims() == 4); + assert(output_shape.dim(0) == batches); + assert(output_shape.dim(1) == 1); + assert(output_shape.dim(2) == 1); + assert(output_shape.dim(3) == depth); + + const double real_multiplier = + static_cast<double>(input()->scale()) / static_cast<double>(output()->scale()); + + int32_t output_multiplier{}; + int output_shift{}; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + const int32_t num_elements_in_axes = input_height * input_width; + + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t c = 0; c < depth; ++c) + { + int32_t acc = 0; + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)]; + } + } + int32_t scaled_acc = + tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + // Divide by the number of elements rounding to the nearest integer. + scaled_acc = scaled_acc > 0 + ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes + : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes; + + scaled_acc = std::max(scaled_acc, output_min); + scaled_acc = std::min(scaled_acc, output_max); + + output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc; + } + } + } + else + { + throw std::runtime_error("Unsupported configuration."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h new file mode 100644 index 000000000..ed07ae561 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H +#define LUCI_INTERPRETER_KERNELS_MEAN_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <memory> + +namespace luci_interpreter +{ +namespace kernels +{ + +class Mean : public KernelWithParams<ReducerParams> +{ +public: + Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + bool _need_temporaries = false; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MEAN_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp new file mode 100644 index 000000000..d2c00935a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mean.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MeanTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MeanTest, FloatKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{0, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{10.5, 12.5, 14.5}; + std::initializer_list<int32_t> ref_output_shape{1, 3, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, FloatKeepDims4DMean) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{6, 7, 18, 19}; + std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, FloatNotKeepDims) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + + std::vector<int32_t> axis_data{1, 0, -3, -3}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ReducerParams params{}; + params.keep_dims = false; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{12, 13}; + std::initializer_list<int32_t> ref_output_shape{2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, Uint8KeepDims) +{ + float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); + std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + + std::vector<int32_t> axis_data{1}; + Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::U8, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.3, 0.35, 0.55}; + std::initializer_list<int32_t> ref_output_shape{3, 1}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, Uint8NotKeepDims) +{ + float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); + std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + + std::vector<int32_t> axis_data{1}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + ReducerParams params{}; + params.keep_dims = false; + + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.4, 0.4}; + std::initializer_list<int32_t> ref_output_shape{1, 2}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MeanTest, SInt16KeepDims4D) +{ + std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, + 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, + 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; + std::vector<int32_t> axes_data{1, 2}; + std::vector<float> ref_output_data{6, 7, 18, 19}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get()); + Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); + + ReducerParams params{}; + params.keep_dims = true; + + Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); + kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp new file mode 100644 index 000000000..5d3dcde72 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Minimum.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Minimum::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Minimum::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalMinimum<float>(); + break; + case DataType::U8: + evalMinimum<uint8_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> inline void Minimum::evalMinimum() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output()), + [](T x, T y) { return std::min(x, y); }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h new file mode 100644 index 000000000..5ff4035b4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H +#define LUCI_INTERPRETER_KERNELS_MINIMUM_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Minimum : public Kernel +{ +public: + Minimum(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> inline void evalMinimum() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp new file mode 100644 index 000000000..9a143643f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Minimum.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MinimumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MinimumTest, Float) +{ + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(MinimumTest, Uint8) +{ + Shape input_shape{3, 1, 2}; + std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23}; + std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1}; + Tensor input_tensor1 = + makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> ref_output_shape{2, 4}; + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp new file mode 100644 index 000000000..bae1eac70 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MirrorPad.h" + +#include "kernels/Utils.h" + +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output, + const MirrorPadParams ¶ms) + : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params) +{ +} + +void MirrorPad::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +template <typename T> +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output); + +void MirrorPad::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output()); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max()); + + MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> +inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode, + Tensor &output) +{ + auto const input_dims = input.shape().num_dims(); + auto const input_data = input.data<T>(); + auto const paddings_data = paddings.data<int32_t>(); + auto const output_data = output.data<T>(); + + auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1; + auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1; + auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1; + auto const input_d = input.shape().dim(input_dims - 1); + + auto const input_h_offset = input_d * input_w; + auto const input_b_offset = input_h_offset * input_h; + + auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1; + auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1; + auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1; + auto const output_d = output.shape().dim(input_dims - 1); + + auto const left_b_pad = paddings_data[2 * (input_dims - 4)]; + auto const left_h_pad = paddings_data[2 * (input_dims - 3)]; + auto const left_w_pad = paddings_data[2 * (input_dims - 2)]; + auto const left_d_pad = paddings_data[2 * (input_dims - 1)]; + + auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1]; + auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1]; + auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1]; + auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1]; + + const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; }; + const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h, + auto b) { + return d + w * input_d + h * input_h_offset + b * input_b_offset; + }; + + const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) { + bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1; + return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input); + }; + + const T *in_ptr = input_data; + T *out_ptr = output_data; + + for (int32_t b = 0; b < output_b; ++b) + { + for (int32_t h = 0; h < output_h; ++h) + { + for (int32_t w = 0; w < output_w; ++w) + { + for (int32_t d = 0; d < output_d; ++d) + { + if (b < left_b_pad || b >= output_b - right_b_pad || // + h < left_h_pad || h >= output_h - right_h_pad || // + w < left_w_pad || w >= output_w - right_w_pad || // + d < left_d_pad || d >= output_d - right_d_pad) + { + if (mode == MirrorPadMode::REFLECT) + { + *out_ptr++ = input_data[offset_index( + positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w), + positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))]; + } + else + { + *out_ptr++ = input_data[offset_index( + symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w), + symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))]; + } + } + else + { + *out_ptr++ = *in_ptr++; + } + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h new file mode 100644 index 000000000..d3e6e858a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H +#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class MirrorPad : public KernelWithParams<MirrorPadParams> +{ +public: + MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output, + const MirrorPadParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp new file mode 100644 index 000000000..740d8cb22 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/MirrorPad.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MirrorPadTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode) + { + MirrorPadParams params{}; + params.mode = mode; + + MirrorPad kernel(&input, &padding, &output, params); + kernel.configure(); + _memory_manager->allocate_memory(output); + kernel.execute(); + } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MirrorPadTest, FloatReflect) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector<float> input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, // + 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, // + 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; // + std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric) +{ + Shape input_shape = {1, 2, 2, 1}; + Shape padding_shape = {4, 2}; + + std::vector<float> input_data{1.0f, 2.0f, // + 3.0f, 4.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 1.0, 1.0, 2.0, 2.0, 1.0, // + 3.0, 3.0, 4.0, 4.0, 3.0, // + 3.0, 3.0, 4.0, 4.0, 3.0}; // + std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, FloatSymmetric2Dim) +{ + Shape input_shape = {3, 1}; + Shape padding_shape = {2, 2}; + + std::vector<float> input_data{1.0f, 2.0f, 3.0f}; + std::vector<int> padding_data{1, 2, 0, 0}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0}; + std::initializer_list<int32_t> ref_output_shape{6, 1}; + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Reflect) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f); + + std::vector<float> input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT); + + std::vector<float> ref_output_data{ + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, // + 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, Uint8Symmetric) +{ + Shape input_shape = {1, 2, 3, 1}; + Shape padding_shape = {4, 2}; + + float quant_tolerance = getTolerance(0.0f, 6.0f, 255); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f); + + std::vector<float> input_data{1.0f, 2.0f, 3.0f, // + 4.0f, 5.0f, 6.0f}; // + std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + + Tensor padding_tensor = + makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC); + + std::vector<float> ref_output_data{ + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, // + }; + std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1}; + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, quant_tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(MirrorPadTest, UnsupportedDim_NEG) +{ + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get()); + Tensor padding_tensor = + makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +TEST_F(MirrorPadTest, InvalidInputType_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp new file mode 100644 index 000000000..531fb4fa1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mul.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include "PALMul.h" + +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms) + : KernelWithParams<MulParams>({input1, input2}, {output}, params) +{ +} + +void Mul::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type()); + if (input1()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 && + input2()->zero_points().size() == 1) + LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 && + output()->zero_point() == 0); + } + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Mul::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Mul::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Mul::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + luci_interpreter_pal::BroadcastMul4DSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +void Mul::evalQuantizedS16() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const double real_multiplier = input1_scale * input2_scale / output_scale; + + int32_t output_multiplier; + int output_shift; + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val, + int16_t input2_val) { + int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val); + output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift); + output = std::max(output, activation_min); + output = std::min(output, activation_max); + return static_cast<int16_t>(output); + }; + + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()), + getTensorShape(input2()), getTensorData<int16_t>(input2()), + getTensorShape(output()), getTensorData<int16_t>(output()), fn); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h new file mode 100644 index 000000000..c0cf817df --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_MUL_H +#define LUCI_INTERPRETER_KERNELS_MUL_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <cstdint> +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class Mul : public KernelWithParams<MulParams> +{ +public: + Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantizedS16() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_MUL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp new file mode 100644 index 000000000..fc0e60614 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Mul.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class MulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MulTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<float>> test_outputs = { + {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, + 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, + 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, + 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, + 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, + {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + } +} + +template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + + dtype max_value = std::numeric_limits<dtype>::max(); + dtype res_max = max_value - max_value % 10; + + std::vector<std::vector<dtype>> test_outputs = { + {8, 0, 20, 0, 4, 30, // + 16, 0, 40, 3, 8, 0, // + 0, 0, 0, 6, 0, 0, // + 4, 0, 10, 9, 2, 0, // + 40, 0, 100, 0, 20, 150, // + 28, 0, 70, 0, 14, res_max}, + {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max}, + {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2, + 70, res_max}, + {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}}; + std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10}; + std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } + // Re-run with exchanged inputs. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +} + +TEST_F(MulTest, SInt64) +{ + checkInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt32) +{ + checkInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(MulTest, SInt16) +{ + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<int32_t>> ref_output_shapes{ + {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + + std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector<std::vector<float>> ref_outputs = { + {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f, + 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f, + 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f}, + {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f, + 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f, + 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f}, + {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); + const float tolerance = output_tensor.scale() * 2; + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } + // Re-run with exchanged inputs and different scales. + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0); + const float tolerance = output_tensor.scale() * 2; + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), + ::testing::ElementsAreArray(ref_output_shapes[i])) + << "With shape number " << i; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance)) + << "With shape number " << i; + } +} + +TEST_F(MulTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(MulTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + MulParams params{}; + params.activation = Activation::RELU; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(MulTest, Invalid_Quantization_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + MulParams params{}; + params.activation = Activation::NONE; + + Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp new file mode 100644 index 000000000..c6fe08a9e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Neg.h" +#include "kernels/Utils.h" + +#include "PALNeg.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Neg::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void Neg::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Neg::evalFloat() const +{ + luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h new file mode 100644 index 000000000..69fa1a18e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_NEG_H +#define LUCI_INTERPRETER_KERNELS_NEG_H + +#include "core/Kernel.h" +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class Neg : public Kernel +{ +public: + Neg(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_NEG_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp new file mode 100644 index 000000000..8b2bc1a82 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Neg.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T> input_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + Neg kernel(&input_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(NegTest, FloatSimple) +{ + Check<float>(/*input_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, -1.0f, -3.0f, // Row 1 + -1.0f, 1.0f, 2.0f, // Row 2 + }); + + SUCCEED(); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp new file mode 100644 index 000000000..54e5eee34 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/NotEqual.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/comparisons.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {} + +void NotEqual::configure() +{ + LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL); + + if (x()->element_type() == DataType::U8) + { + quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift); + quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift); + } + output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape())); +} + +void NotEqual::execute() const +{ + switch (x()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void NotEqual::evalFloat() const +{ + const auto x_data = getTensorData<float>(x()); + const auto y_data = getTensorData<float>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()), + y_data, getTensorShape(output()), output_data); + } +} + +template <typename T> void NotEqual::evalInteger() const +{ + const auto x_data = getTensorData<T>(x()); + const auto y_data = getTensorData<T>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } + else + { + tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } +} + +void NotEqual::evalQuantized() const +{ + const auto x_data = getTensorData<uint8_t>(x()); + const auto y_data = getTensorData<uint8_t>(y()); + auto output_data = getTensorData<bool>(output()); + + tflite::ComparisonParams op_params; + op_params.left_shift = 8; + op_params.input1_offset = -x()->zero_point(); // Note the '-' + op_params.input1_shift = _x_shift; + op_params.input1_multiplier = _x_multiplier; + op_params.input2_offset = -y()->zero_point(); // Note the '-' + op_params.input2_shift = _y_shift; + op_params.input2_multiplier = _y_multiplier; + op_params.is_broadcast = x()->shape() != y()->shape(); + + if (op_params.is_broadcast) + { + tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling( + op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()), + output_data); + } + else + { + tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data, + getTensorShape(y()), y_data, + getTensorShape(output()), output_data); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h new file mode 100644 index 000000000..d2aafe893 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H +#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class NotEqual : public Kernel +{ +public: + NotEqual(const Tensor *x, const Tensor *y, Tensor *output); + + const Tensor *x() const { return _inputs[0]; } + const Tensor *y() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; + +private: + int32_t _x_multiplier = 0; + int _x_shift = 0; + int32_t _y_multiplier = 0; + int _y_shift = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp new file mode 100644 index 000000000..45bf4022a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/NotEqual.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class NotEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(NotEqualTest, FloatSimple) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + -1, 0, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + true, false, true, // Row 2 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(NotEqualTest, FloatBroardcast) +{ + std::vector<float> x_data{ + 0.5, 0.7, 0.9, // Row 1 + 1, 0, -1, // Row 2 + -1, 0, 1, // Row 3 + 0.9, 0.7, 0.5, // Row 4 + }; + + std::vector<float> y_data{ + 0.9, 0.7, 0.5, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, false, true, // Row 1 + true, true, true, // Row 2 + true, true, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +template <loco::DataType DType> +void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{min_value, 2, max_value}; + + std::vector<dtype> y_data{min_value, -2, max_value}; + + std::vector<bool> ref_output_data{false, true, false}; + + Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3})); +} + +template <loco::DataType DType> +void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + dtype min_value = std::numeric_limits<dtype>::min(); + dtype max_value = std::numeric_limits<dtype>::max(); + std::vector<dtype> x_data{ + min_value, 2, 3, // Row 1 + 4, 5, max_value, // Row 2 + -1, -2, -3, // Row 3 + min_value, -2, max_value, // Row 4 + }; + + std::vector<dtype> y_data{ + min_value, -2, max_value, // Row 1 + }; + + std::vector<bool> ref_output_data{ + false, true, true, // Row 1 + true, true, false, // Row 2 + true, false, true, // Row 3 + false, false, false, // Row 4 + }; + + Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager); + Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3})); +} + +TEST_F(NotEqualTest, Int32) +{ + checkIntegerSimple<loco::DataType::S32>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(NotEqualTest, Int64) +{ + checkIntegerSimple<loco::DataType::S64>(_memory_manager.get()); + checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. +const float F_MIN = -128.0 / 128.0; +const float F_MAX = 127.0 / 128.0; + +TEST_F(NotEqualTest, Uint8Quantized) +{ + std::vector<float> x_data{ + 0.5, 0.5, 0.7, 0.9, // Row 1 + 1, 0, 0.05, -1, // Row 2 + }; + + std::vector<float> y_data{ + 0.9, 0.5, 0.55, 0.5, // Row 1 + -1, 0, 0.05, 1, // Row 2 + }; + + std::vector<bool> ref_output_data{ + true, false, true, true, // Row 1 + true, false, false, true, // Row 2 + }; + + std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + + std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(NotEqualTest, Uint8QuantizedBroadcast) +{ + std::vector<float> x_data{ + 0.4, -0.8, 0.7, 0.3, // Row 1 + -0.5, 0.1, 0, 0.5, // Row 2 + 1, 0, 0.05, -1, // Row 3 + -1, 0.05, 0, 1, // Row 4 + }; + + std::vector<float> y_data{ + -1, 0.05, 0, 1, // Row 1 + }; + + std::vector<bool> ref_output_data{ + true, true, true, true, // Row 1 + true, true, false, true, // Row 2 + true, true, true, true, // Row 3 + false, false, false, false, // Row 4 + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); + EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); +} + +TEST_F(NotEqualTest, Input_Type_Mismatch_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Input_Output_Type_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Float_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int32_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +TEST_F(NotEqualTest, Int64_Broadcast_NEG) +{ + Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); + ASSERT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp new file mode 100644 index 000000000..4d3e5f2ef --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template <typename T> +void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor, + const Tensor *off_value_tensor, int32_t depth, int32_t axis, + Tensor *output_tensor) +{ + // define input shape and correct axis + auto const &input_shape = indices_tensor->shape(); + axis = axis == -1 ? input_shape.num_dims() : axis; + + // TODO support other integer input types + auto const *indices = getTensorData<int32_t>(indices_tensor); + auto const on_value = getTensorData<T>(on_value_tensor)[0]; + auto const off_value = getTensorData<T>(off_value_tensor)[0]; + auto *output = getTensorData<T>(output_tensor); + + // prefix_dim_size == # of elements before the axis + // depth == # of elements per axis + // suffix_dim_size == # of elements after the axis + auto prefix_dim_size = 1; + for (int32_t i = 0; i < axis; ++i) + { + prefix_dim_size *= input_shape.dim(i); + } + assert(prefix_dim_size > 0); + auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size; + + // View the indices as a matrix of size: + // prefix_dim_size x suffix_dim_size + // View the output as a matrix of size: + // prefix_dim_size x depth x suffix_dim_size + // Then the output is: + // output(i, j, k) == (indices(i, k) == j) ? on : off + for (int32_t i = 0; i < prefix_dim_size; ++i) + for (int32_t j = 0; j < depth; ++j) + for (int32_t k = 0; k < suffix_dim_size; ++k, ++output) + *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value; +} + +} // namespace + +OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms) + : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params) +{ + // Do nothing +} + +void OneHot::configure() +{ + // check types + LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type()); + LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type()); + + // check shape dependent parameters + LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1); + LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims()); + + // define parameters that affect the output shape + auto const depth_value = getTensorData<int32_t>(depth())[0]; + auto const &input_shape = indices()->shape(); + auto const input_dims = input_shape.num_dims(); + auto const axis = params().axis == -1 ? input_dims : params().axis; + + // define output shape + Shape output_shape(input_shape.num_dims() + 1); + { + for (int32_t d = 0; d < axis; ++d) + output_shape.dim(d) = input_shape.dim(d); + + output_shape.dim(axis) = depth_value; + + for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d) + output_shape.dim(d) = input_shape.dim(d - 1); + } + + // reshape output + output()->resize(output_shape); +} + +void OneHot::execute() const +{ + auto const depth_value = getTensorData<int32_t>(depth())[0]; + auto const axis = params().axis; + + switch (output()->element_type()) + { + case loco::DataType::FLOAT32: + OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::U8: + OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + case loco::DataType::S16: + OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output()); + break; + default: + // TODO Support other data types + throw std::runtime_error("Not supported, yet!"); + break; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h new file mode 100644 index 000000000..572f857ae --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H +#define LUCI_INTERPRETER_KERNELS_ONEHOT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class OneHot : public KernelWithParams<OneHotParams> +{ +public: + OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value, + const Tensor *off_value, Tensor *output, const OneHotParams ¶ms); + + const Tensor *indices() const { return _inputs[0]; } + const Tensor *depth() const { return _inputs[1]; } + const Tensor *on_value() const { return _inputs[2]; } + const Tensor *off_value() const { return _inputs[3]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp new file mode 100644 index 000000000..45b6968fa --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/OneHot.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T1, typename T2> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data, + std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data, + int32_t axis, std::initializer_list<T2> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr auto input_type = getElementType<T1>(); + constexpr auto output_type = getElementType<T2>(); + + Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + OneHotParams params{}; + params.axis = axis; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); + EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template <typename T> class OneHotTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int16_t>; +TYPED_TEST_SUITE(OneHotTest, DataTypes); + +TYPED_TEST(OneHotTest, BasicPattern) +{ + // axis 0 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/0, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 1, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 0, 0, // + 0, 0, 0, // + + 0, 1, 0, // + 0, 1, 0, // + }); + // axis 1 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/1, + /*output_data=*/ + { + 1, 0, 0, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + + 0, 0, 1, // + 0, 0, 0, // + 0, 0, 0, // + 0, 1, 0, // + }); + // axis -1 + Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4}, + /*input_data=*/ + { + 0, 3, 5, // + 7, 3, 0, // + }, + /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0}, + /*axis=*/-1, + /*output_data=*/ + { + 1, 0, 0, 0, // + 0, 0, 0, 1, // + 0, 0, 0, 0, // + + 0, 0, 0, 0, // + 0, 0, 0, 1, // + 1, 0, 0, 0, // + }); +} + +TEST(OneHotTest, UnsupportedInputType_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + // input type should be integer + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get()); + + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, OutputTypeMismatch_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + + // type of on_value, off_value and output_tensor should be same + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16); + + OneHotParams params = {-1}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(OneHotTest, InvalidAxis_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get()); + Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get()); + Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get()); + Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + // axis should be in [-1, input_shape.rank] + OneHotParams params = {-2}; + + OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor, + params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp new file mode 100644 index 000000000..5a6b05c3a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PRelu.h" + +#include "kernels/BinaryOpCommon.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/binary_function.h> +#include <tensorflow/lite/kernels/internal/reference/prelu.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output) + : Kernel({input, alpha}, {output}) +{ +} + +PRelu::~PRelu() +{ + // Destructor declared to delete vector of alpha quantized data properly +} + +void PRelu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1); + LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1); + + if (input()->element_type() == DataType::U8) + { + LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives + _alpha_multipliers.resize(1); + double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale(); + quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier, + &_alpha_multipliers[0].shift); + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + else if (input()->element_type() == DataType::S16) + { + // Common check for correctness of quant params + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel) + { + LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0); + } + // PRelu specific checks for CWQ + LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1); + LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) == + alpha()->shape().dim(alpha()->quantized_dimension())); + LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() == + input()->shape().dim(input()->shape().num_dims() - 1)); + + // all dimension of alpha except last one should be size 1 + for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim) + { + LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1); + } + + std::vector<double> real_multipliers = + getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale()); + + _alpha_multipliers = quantizeMultipliers(real_multipliers); + + double identity_multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity); + } + output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape())); +} + +void PRelu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void PRelu::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto alpha_data = getTensorData<float>(alpha()); + const auto size = getTensorShape(input()).FlatSize(); + auto output_data = getTensorData<float>(output()); + + auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; }; + + if (input()->shape() != alpha()->shape()) + { + tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>( + getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()), + getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()), + PReluFunc); + } + else + { + for (auto i = decltype(size){0}; i < size; ++i) + { + if (input_data[i] >= 0) + output_data[i] = input_data[i]; + else + output_data[i] = input_data[i] * alpha_data[i]; + } + } +} + +void PRelu::evalQuantized() const +{ + tflite::PreluParams op_params{}; + + op_params.input_offset = -input()->zero_point(); // Note the '-'. + op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'. + op_params.output_offset = output()->zero_point(); + op_params.output_shift_1 = _output_shift_identity; + op_params.output_multiplier_1 = _output_multiplier_identity; + op_params.output_shift_2 = _alpha_multipliers[0].shift; + op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier; + + if (input()->shape() != alpha()->shape()) + { + tflite::reference_ops::BroadcastPrelu4DSlow( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()), + getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Prelu<uint8_t>( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()), + getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val, + const ChannelQuantMultipliers &identity_mult, + const ChannelQuantMultipliers &alpha_mult) +{ + constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min(); + constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max(); + + const int32_t output_val = + input_val >= 0 + ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val), + identity_mult.multiplier, identity_mult.shift) + : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val), + alpha_mult.multiplier, alpha_mult.shift); + const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val)); + return clamped_output; +} + +void PRelu::evalQuantizedS16() const +{ + // Note that this kernel assumes alpha is CWQ + tflite::RuntimeShape input_shape = getTensorShape(input()); + const int16_t *input_data = input()->data<int16_t>(); + const int16_t *alpha_data = alpha()->data<int16_t>(); + int16_t *output_data = output()->data<int16_t>(); + + const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity}; + + const int last_dim = input()->shape().num_dims() - 1; + + int32_t outer_dims_size = 1; + for (int i = 0; i < last_dim; ++i) + outer_dims_size *= input_shape.Dims(i); + int32_t quant_dim_size = input_shape.Dims(last_dim); + + for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims) + for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel) + { + const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel]; + size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size); + offset += quant_channel; + + output_data[offset] = + evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h new file mode 100644 index 000000000..f7735d418 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H +#define LUCI_INTERPRETER_KERNELS_PRELU_H + +#include "core/Kernel.h" +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class ChannelQuantMultipliers; + +class PRelu : public Kernel +{ +public: + PRelu(const Tensor *input, const Tensor *alpha, Tensor *output); + + ~PRelu(); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *alpha() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + std::vector<ChannelQuantMultipliers> _alpha_multipliers; + // TODO merge this into one ChannelQuantMultiplier object + int32_t _output_multiplier_identity = 0; + int _output_shift_identity = 0; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PRELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp new file mode 100644 index 000000000..6d97382de --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PRelu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, + std::initializer_list<T> alpha_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(PReluTest, FloatSimple) +{ + Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3}, + /*output_shape=*/{2, 3}, + /*input_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + /*alpha_data=*/ + { + 0.0f, 0.5f, 0.1f, // Row 1 + 0.0f, 0.5f, 0.1f, // Row 2 + }, + /*output_data=*/ + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -0.5f, -0.2f, // Row 2 + }); + + SUCCEED(); +} + +TEST(PReluTest, FloatBroadcast) +{ + Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3}, + /*output_shape=*/{1, 2, 2, 3}, + /*input_data=*/ + { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -2.0f, -2.0f, -2.0f, // Row 2, Column 2 + }, + /*alpha_data=*/ + {0.0f, 1.0f, 2.0f}, + /*output_data=*/ + { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + 0.0f, -1.0f, -2.0f, // Row 2, Column 1 + 0.0f, -2.0f, -4.0f, // Row 2, Column 2 + }); + + SUCCEED(); +} + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(PReluTest, Uint8Simple) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f}; + std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f}; + std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f}; + + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1})); + + SUCCEED(); +} + +TEST(PReluTest, Uint8Broadcast) +{ + std::vector<float> input_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 2, Column 2 + }; + std::vector<float> alpha_data{0.0f, 0.5f, -0.5f}; + std::vector<float> ref_output_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f // Row 2, Column 2 + }; + std::vector<float> ref_quant_output_data{ + 128, 128, 128, // Row 1, Column 1 + 192, 192, 192, // Row 1, Column 2 + 128, 64, 192, // Row 2, Column 1 + 128, 112, 144 // Row 2, Column 2 + }; + float kQuantizedTolerance = 2 * (1. / 256); + const float kMin = -1; + const float kMax = 127.f / 128.f; + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax); + + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>( + {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_quant_output_data)); +} + +TEST(PReluTest, SInt16_LWQ_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + // Rewrite this test in case layer-wise quantization for sint16 is supported + std::vector<float> input_data(6); // data is not important + std::vector<float> alpha_data(6); + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_Simple) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; + std::vector<float> alpha_data{0.5f, 0.25f}; + std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; + + std::vector<float> alpha_scales{0.05f, 0.025f}; + std::vector<int32_t> zerop{0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data(6); // data is not important + std::vector<float> alpha_data(6); + + std::vector<float> alpha_scales{0.25f, 0.05f}; + std::vector<int32_t> zerop{0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data(6); // data is not important + std::vector<float> alpha_data(6); + + std::vector<float> alpha_scales{0.25f}; + std::vector<int32_t> zerop{0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, SInt16_CWQ_uneven_shape1) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; + std::vector<float> alpha_data{0.5f, 0.25f}; + std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; + + std::vector<float> alpha_scales{0.05f, 0.025f}; + std::vector<int32_t> zerop{0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, SInt16_CWQ_uneven_shape2) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 2, Column 2 + }; + std::vector<float> alpha_data{0.0f, 0.5f, -0.5f}; + std::vector<float> ref_output_data{ + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f // Row 2, Column 2 + }; + + std::vector<float> alpha_scales{1.f, 0.05f, 0.1f}; + std::vector<int32_t> zerop{0, 0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(PReluTest, Input_Output_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Input_Alpha_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST(PReluTest, Input_Output_U8_CWQ_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> scales{1.f, 1.f}; + std::vector<int32_t> zerop{0, 0}; + std::vector<float> dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Input_Output_S16_CWQ_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> scales{1.f, 1.f}; + std::vector<int32_t> zerop{0, 0}; + std::vector<float> dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(PReluTest, Mixing_U8_S16_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> dummy_data(4, 0.f); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + + PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp new file mode 100644 index 000000000..42aab330c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pack.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms) + : KernelWithParams<PackParams>(std::move(inputs), {output}, params) +{ +} + +void Pack::configure() +{ + LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count)); + const Tensor *t0 = _inputs[0]; + const int dimension_size = t0->shape().num_dims() + 1; + int axis = params().axis; + if (axis < 0) + { + axis += dimension_size; + } + LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims()); + + if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 && + t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 && + t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64) + { + throw std::runtime_error("Unsupported type."); + } + + for (uint32_t i = 1; i < _inputs.size(); ++i) + { + const Tensor *tensor = _inputs[i]; + LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type()); + LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims()); + for (int d = 0; d < t0->shape().num_dims(); ++d) + { + LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d)); + } + } + + Shape output_shape(dimension_size); + int i = 0; + for (int index = 0; index < dimension_size; ++index) + { + if (index == axis) + { + output_shape.dim(index) = params().values_count; + } + else + { + output_shape.dim(index) = t0->shape().dim(i++); + } + } + + if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 || + t0->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point()); + LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale()); + // Guarantee input/output quantization params match as we do not support + // packing quantized tensors. + for (int i = 0; i < params().values_count; i++) + { + LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point()); + LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale()); + } + } + + output()->resize(output_shape); +} + +void Pack::execute() const +{ + switch (_inputs[0]->element_type()) + { + case DataType::FLOAT32: + evalGeneric<float>(); + break; + case DataType::U8: + evalGeneric<uint8_t>(); + break; + case DataType::S8: + evalGeneric<int8_t>(); + break; + case DataType::S16: + evalGeneric<int16_t>(); + break; + case DataType::S32: + evalGeneric<int32_t>(); + break; + case DataType::S64: + evalGeneric<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void Pack::evalGeneric() const +{ + const Tensor *t0 = _inputs[0]; + const int dimension_size = t0->shape().num_dims() + 1; + int axis = params().axis; + if (axis < 0) + { + axis += dimension_size; + } + + VectorOfTensors<T, true> inputs(_inputs); + tflite::PackParams params{}; + params.axis = axis; + params.inputs_count = _inputs.size(); + tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()), + getTensorData<T>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h new file mode 100644 index 000000000..4a2fcfd80 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PACK_H +#define LUCI_INTERPRETER_KERNELS_PACK_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pack : public KernelWithParams<PackParams> +{ +public: + Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams ¶ms); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void evalGeneric() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PACK_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp new file mode 100644 index 000000000..d16320b78 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pack.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::vector<std::initializer_list<int32_t>> input_shapes, + std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas, + std::initializer_list<T> output_data, int32_t axis) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + std::vector<const Tensor *> inputs(input_datas.size()); + std::vector<Tensor> tmp_inputs; + for (int i = 0; i < input_datas.size(); i++) + { + if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value || + std::is_same<T, int64_t>::value) + { + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) + { + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + else + { + assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested"); + tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); + tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); + } + } + for (int i = 0; i < input_datas.size(); i++) + { + inputs[i] = &tmp_inputs[i]; + } + + Tensor output_tensor = makeOutputTensor(element_type); + if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128); + } + else if (std::is_same<T, int16_t>::value) + { + output_tensor = makeOutputTensor(element_type, 1.0f, 0); + } + + PackParams params{}; + params.axis = axis; + params.values_count = input_datas.size(); + Pack kernel(inputs, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class PackTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>; +TYPED_TEST_SUITE(PackTest, DataTypes); + +TYPED_TEST(PackTest, ThreeInputs) +{ + Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}}, + /*output_shape=*/{3, 2}, + /*input_datas=*/ + {{1, 4}, {2, 5}, {3, 6}}, + /*output_data=*/ + {1, 4, 2, 5, 3, 6}, /*axis=*/0); + + SUCCEED(); +} + +TYPED_TEST(PackTest, NegAxis) +{ + Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}}, + /*output_shape=*/{2, 3}, + /*input_datas=*/ + {{1, 4}, {2, 5}, {3, 6}}, + /*output_data=*/ + {1, 2, 3, 4, 5, 6}, /*axis=*/-1); + + SUCCEED(); +} + +TEST(Pack, MismatchingInputValuesCount_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input1_data{1, 4}; + std::vector<float> input2_data{2, 5}; + std::vector<float> input3_data{3, 6}; + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + PackParams params{}; + { + params.axis = 0; + params.values_count = 2; + + Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); + } +} + +TEST(Pack, InvalidInputAxis_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input1_data{1, 4}; + std::vector<float> input2_data{2, 5}; + std::vector<float> input3_data{3, 6}; + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + PackParams params{}; + { + params.axis = 2; + params.values_count = 3; + + Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); + } +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp new file mode 100644 index 000000000..c07f6e310 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pad.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/pad.h> + +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output) + : Kernel({input, paddings}, {output}) +{ +} + +void Pad::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +void Pad::execute() const +{ + const int num_dims = input()->shape().num_dims(); + + tflite::PadParams params{}; + params.left_padding_count = num_dims; + params.right_padding_count = num_dims; + + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = num_dims - 1; i >= 0; --i) + { + params.left_padding[i] = paddings_data[i * 2]; + params.right_padding[i] = paddings_data[i * 2 + 1]; + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + const float pad_value = 0.0f; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()), + &pad_value, getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max()); + const auto pad_value = static_cast<uint8_t>(output()->zero_point()); + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + &pad_value, getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + } + case DataType::S8: + { + assert(output()->zero_point() >= std::numeric_limits<int8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<int8_t>::max()); + const auto pad_value = static_cast<int8_t>(output()->zero_point()); + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()), + &pad_value, getTensorShape(output()), + getTensorData<int8_t>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h new file mode 100644 index 000000000..e05b47f29 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PAD_H +#define LUCI_INTERPRETER_KERNELS_PAD_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pad : public Kernel +{ +public: + Pad(const Tensor *input, const Tensor *paddings, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PAD_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp new file mode 100644 index 000000000..dd3ce947c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pad.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(Pad, Uint8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; + std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); +} + +TEST(Pad, Int8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f); + std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2}; + std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0}; + Tensor input_tensor = makeInputTensor<DataType::S8>( + {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0, + 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1})); +} + +TEST(Pad, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6}; + std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp new file mode 100644 index 000000000..197cdaa69 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PadV2.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/pad.h> + +#include <limits> + +namespace luci_interpreter +{ +namespace kernels +{ + +PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, + Tensor *output) + : Kernel({input, paddings, constant_values}, {output}) +{ +} + +void PadV2::configure() +{ + const Shape &input_shape = input()->shape(); + const int num_dims = input_shape.num_dims(); + + if (num_dims > 4) + throw std::runtime_error("Unsupported number of dimensions."); + + assert(output()->element_type() == input()->element_type()); + assert(paddings()->element_type() == DataType::S32); + assert(constant_values()->element_type() == output()->element_type()); + // Paddings shape should be [N, 2]. + assert(paddings()->shape().num_dims() == 2); + assert(paddings()->shape().dim(0) == num_dims); + assert(paddings()->shape().dim(1) == 2); + // Constant values elements number should be 1. + assert(constant_values()->shape().num_elements() == 1); + + Shape output_shape(num_dims); + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = 0; i < num_dims; ++i) + { + const int32_t padding_before = paddings_data[i * 2]; + const int32_t padding_after = paddings_data[i * 2 + 1]; + assert(padding_before >= 0 && padding_after >= 0); + output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after; + } + + output()->resize(output_shape); +} + +void PadV2::execute() const +{ + const int num_dims = input()->shape().num_dims(); + + tflite::PadParams params{}; + params.left_padding_count = num_dims; + params.right_padding_count = num_dims; + + const auto *paddings_data = getTensorData<int32_t>(paddings()); + for (int i = num_dims - 1; i >= 0; --i) + { + params.left_padding[i] = paddings_data[i * 2]; + params.right_padding[i] = paddings_data[i * 2 + 1]; + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + { + const auto pad_value = getTensorData<float>(constant_values())[0]; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()), + &pad_value, getTensorShape(output()), + getTensorData<float>(output())); + break; + } + case DataType::U8: + { + assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min()); + assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max()); + const auto pad_value = getTensorData<uint8_t>(constant_values())[0]; + tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + &pad_value, getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h new file mode 100644 index 000000000..48a31f584 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H +#define LUCI_INTERPRETER_KERNELS_PAD_V2_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class PadV2 : public Kernel +{ +public: + PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *paddings() const { return _inputs[1]; } + const Tensor *constant_values() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp new file mode 100644 index 000000000..41efaff06 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/PadV2.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +float GetTolerance(float min, float max) { return (max - min) / 255.0; } + +TEST(PadV2, Uint8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); + std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; + std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; + std::vector<float> constant_values_data{0.5}; + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = makeInputTensor<DataType::U8>( + {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data = { + 0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5, // + 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}; // + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1})); +} + +TEST(PadV2, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4, 5, 6}; + std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; + std::vector<float> constant_values_data{7}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = + makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp new file mode 100644 index 000000000..722c64024 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pow.h" +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void Pow::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()); + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()); + + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Pow::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + eval<float>(); + break; + case DataType::S32: + eval<int32_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void Pow::eval() const +{ + tflite::ArithmeticParams params{}; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h new file mode 100644 index 000000000..8ff865e40 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_POW_H +#define LUCI_INTERPRETER_KERNELS_POW_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Pow : public Kernel +{ +public: + Pow(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void eval() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_POW_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp new file mode 100644 index 000000000..0e858115d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Pow.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class PowTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(PowTest, SimplePow) +{ + std::initializer_list<int32_t> base_shape = {1, 1, 3, 2}; + + std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f}; + std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); +} + +TEST_F(PowTest, FloatBroadcastPow) +{ + std::initializer_list<int32_t> input1_shape = {1, 3}; + std::initializer_list<int32_t> input2_shape = {3, 1}; + + std::vector<float> input1_data{0.3f, 2.3f, 0.9f}; + std::vector<float> input2_data{0.2f, 0.3f, 0.4f}; + std::vector<float> test_outputs{0.786f, 1.18126f, 0.9791f, 0.6968f, 1.28386f, + 0.96888f, 0.6178f, 1.3953f, 0.9587f}; + + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); +} + +TEST_F(PowTest, IntPow) +{ + std::initializer_list<int32_t> base_shape = {1, 3}; + + std::vector<int32_t> input_data{2, 3, 4}; + std::vector<int32_t> test_outputs{4, 27, 256}; + + Tensor input1_tensor = + makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); +} + +TEST_F(PowTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::BOOL); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(PowTest, Input_Type_Mismatch_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(PowTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); + kernel.configure(); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp new file mode 100644 index 000000000..0c8544a65 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/Utils.h" +#include "PALQuantize.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output) +{ + int32_t multiplier; + int shift; + + const double effective_output_scale = input->scale() / output->scale(); + quantizeMultiplier(effective_output_scale, &multiplier, &shift); + + const auto input_shape = getTensorShape(input); + const auto output_shape = getTensorShape(output); + const auto size = tflite::MatchingFlatSize(input_shape, output_shape); + + const auto input_data = getTensorData<input_dtype>(input); + + switch (output->element_type()) + { + case loco::DataType::S8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<int8_t>(output)); + break; + case loco::DataType::U8: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<uint8_t>(output)); + break; + case loco::DataType::S16: + luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(), + output->zero_point(), getTensorData<int16_t>(output)); + break; + default: + throw std::runtime_error("Unsupported quantized type, yet!"); + } +} + +} // namespace + +Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Quantize::configure() +{ + + if (input()->element_type() == loco::DataType::S16) + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0); + + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::S16); + break; + } + case loco::DataType::S16: + case loco::DataType::S8: + case loco::DataType::U8: + { + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 || + output()->element_type() == loco::DataType::U8 || + output()->element_type() == loco::DataType::S16); + if (output()->element_type() == loco::DataType::S16) + { + LUCI_INTERPRETER_CHECK(output()->zero_point() == 0); + } + break; + } + default: + throw std::runtime_error("Unsupported type"); + } + + output()->resize(input()->shape()); +} + +void Quantize::execute() const +{ + switch (input()->element_type()) + { + case loco::DataType::FLOAT32: + { + tflite::QuantizationParams op_params; + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + const auto input_data = getTensorData<float>(input()); + + switch (output()->element_type()) + { + case loco::DataType::S8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), getTensorData<int8_t>(output())); + break; + } + case loco::DataType::U8: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + } + case loco::DataType::S16: + { + luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data, + getTensorShape(output()), + getTensorData<int16_t>(output())); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } + break; + } + case loco::DataType::S16: + { + call_requantize<int16_t>(input(), output()); + break; + } + case loco::DataType::S8: + { + call_requantize<int8_t>(input(), output()); + break; + } + case loco::DataType::U8: + { + call_requantize<uint8_t>(input(), output()); + break; + } + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h new file mode 100644 index 000000000..006c5366f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H +#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Quantize : public Kernel +{ +public: + Quantize(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp new file mode 100644 index 000000000..22e67fe3f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Quantize.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class QuantizeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(QuantizeTest, FloatUint8) +{ + std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt8) +{ + std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}; + + std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, FloatInt16) +{ + std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64}; + + std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400, -200, + 200, 400, 600, 12700, 12800}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5})); +} + +TEST_F(QuantizeTest, Int16Int16) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20}; + + Tensor input_tensor = makeInputTensor<DataType::S16>( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int16_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int8Int8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor<DataType::S8>( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Uint8Uint8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147}; + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, Int16Int8) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19}; + + Tensor input_tensor = makeInputTensor<DataType::S16>( + {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5})); +} + +TEST_F(QuantizeTest, InvalidInputType_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG) +{ + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + Quantize kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp new file mode 100644 index 000000000..747ec6cc8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu.h" +#include "kernels/Utils.h" + +#include "PALRelu.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Relu::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::S16) + { + LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); + } + + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) + { + double multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift); + } + output()->resize(input()->shape()); +} + +void Relu::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Relu::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto input_shape = getTensorShape(input()); + auto output_data = getTensorData<float>(output()); + auto output_shape = getTensorShape(output()); + + luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data); +} + +void Relu::evalQuantized() const +{ + tflite::ReluParams params; + params.input_offset = input()->zero_point(); + params.output_offset = output()->zero_point(); + params.output_multiplier = _output_multiplier; + params.output_shift = _output_shift; + + params.quantized_activation_min = + std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset); + params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max()); + + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +void Relu::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + auto *output_data = getTensorData<int16_t>(output()); + + constexpr int32_t output_min = 0; + constexpr int32_t output_max = std::numeric_limits<int16_t>::max(); + + const int32_t num_elements = input()->shape().num_elements(); + + for (int32_t i = 0; i < num_elements; ++i) + { + const int32_t input_val = input_data[i]; + int32_t output_val = + tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift); + output_val = std::max(output_val, output_min); + output_val = std::min(output_val, output_max); + output_data[i] = static_cast<int16_t>(output_val); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h new file mode 100644 index 000000000..b813f0cdf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RELU_H +#define LUCI_INTERPRETER_KERNELS_RELU_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Relu : public Kernel +{ +public: + Relu(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedS16() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RELU_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp new file mode 100644 index 000000000..bd32e3cc9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReluTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ReluTest, FloatSimple) +{ + std::vector<float> input_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }; + + std::vector<float> ref_output_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, 0.0f, 0.0f, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(ReluTest, Uint8Quantized) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float f_min = (-128.0 / 128.0) * 8; + const float f_max = (127.0 / 128.0) * 8; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); +} + +TEST_F(ReluTest, Uint8Requantized) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float in_min = (-128.0 / 128.0) * 8; + const float in_max = (127.0 / 128.0) * 8; + const float out_min = (0.0 / 256.0) * 8; + const float out_max = (255.0 / 256.0) * 8; + + std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); + + std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); +} + +TEST_F(ReluTest, SInt16) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 7, 1, // + }; + std::vector<float> ref_output_data{ + 0, 0, 2, 4, // + 3, 0, 7, 1, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(ReluTest, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Relu kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(ReluTest, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Relu kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp new file mode 100644 index 000000000..07205ed3a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu6.h" +#include "kernels/Utils.h" + +#include "PALRelu6.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Relu6::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + if (input()->element_type() == DataType::U8) + { + double multiplier = input()->scale() / output()->scale(); + quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift); + } + output()->resize(input()->shape()); +} + +void Relu6::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Relu6::evalFloat() const +{ + const auto input_data = getTensorData<float>(input()); + const auto input_shape = getTensorShape(input()); + auto output_data = getTensorData<float>(output()); + auto output_shape = getTensorShape(output()); + + luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data); +} + +void Relu6::evalQuantized() const +{ + tflite::ReluParams params; + params.input_offset = input()->zero_point(); + params.output_offset = output()->zero_point(); + params.output_multiplier = _output_multiplier; + params.output_shift = _output_shift; + + params.quantized_activation_min = + std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset); + params.quantized_activation_max = + std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()), + params.output_offset + static_cast<int32>(roundf(6.f / output()->scale()))); + + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h new file mode 100644 index 000000000..f5030b588 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H +#define LUCI_INTERPRETER_KERNELS_RELU6_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Relu6 : public Kernel +{ +public: + Relu6(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + +private: + int32_t _output_multiplier{0}; + int32_t _output_shift{0}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RELU6_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp new file mode 100644 index 000000000..af7b3f3db --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Relu6.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class Relu6Test : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(Relu6Test, FloatSimple) +{ + std::vector<float> input_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 7.0f, -1.0f, -2.0f, // Row 2 + }; + + std::vector<float> ref_output_data{ + 0.0f, 1.0f, 3.0f, // Row 1 + 6.0f, 0.0f, 0.0f, // Row 2 + }; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), + ::testing::ElementsAreArray(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); +} + +TEST_F(Relu6Test, Uint8Quantized) +{ + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float f_min = (-128.0 / 128.0) * 10; + const float f_max = (127.0 / 128.0) * 10; + const float tolerance = (f_max - f_min) / 255.0; + + std::vector<float> input_data{ + 0, -6, 2, 8, // + -2, 3, 7, 1, // + }; + + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141})); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); +} + +TEST_F(Relu6Test, Uint8Requantized) +{ + // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. + const float in_min = (-128.0 / 128.0) * 10; + const float in_max = (127.0 / 128.0) * 10; + const float out_min = (0.0 / 256.0) * 0; + const float out_max = (255.0 / 256.0) * 6; + const float tolerance = (in_max - in_min) / 255.0; + + std::vector<float> input_data{ + 0, -6, 2, 8, // + -2, 3, 7, 1, // + }; + + std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); + + std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); + EXPECT_THAT(extractTensorData<uint8_t>(output_tensor), + ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43})); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); +} + +TEST_F(Relu6Test, Input_Output_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Relu6 kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(Relu6Test, Invalid_Input_Type_NEG) +{ + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Relu6 kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp new file mode 100644 index 000000000..61d3300b2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Reshape.h" + +#include <cassert> +#include <cstring> + +namespace luci_interpreter +{ + +namespace kernels +{ + +static Shape extractShapeFromTensor(const Tensor *tensor) +{ + assert(tensor->element_type() == DataType::S32); + Shape shape(tensor->shape().num_elements()); + const auto *shape_data = tensor->data<int32_t>(); + for (int i = 0; i < tensor->shape().num_elements(); ++i) + { + shape.dim(i) = shape_data[i]; + } + return shape; +} + +static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape) +{ + const int32_t num_input_elements = input_shape.num_elements(); + int32_t num_output_elements = 1; + int unknown_dim_index = -1; + for (int i = 0; i < output_shape->num_dims(); ++i) + { + const int32_t value = output_shape->dim(i); + if (value == -1) + { + assert(unknown_dim_index == -1); + unknown_dim_index = i; + } + else + { + num_output_elements *= value; + } + } + if (unknown_dim_index != -1) + { + output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements; + num_output_elements *= output_shape->dim(unknown_dim_index); + } + assert(num_output_elements == num_input_elements); +} + +Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output) + : Kernel({input, shape}, {output}) +{ +} + +void Reshape::configure() +{ + Shape output_shape = extractShapeFromTensor(shape()); + resolveUnknownDimension(input()->shape(), &output_shape); + output()->resize(output_shape); +} + +void Reshape::execute() const +{ + const auto *input_data = input()->data<void>(); + auto *output_data = output()->data<void>(); + + const size_t element_size = getDataTypeSize(input()->element_type()); + const int32_t num_elements = input()->shape().num_elements(); + std::memcpy(output_data, input_data, num_elements * element_size); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h new file mode 100644 index 000000000..99b947f77 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H +#define LUCI_INTERPRETER_KERNELS_RESHAPE_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Reshape : public Kernel +{ +public: + Reshape(const Tensor *input, const Tensor *shape, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *shape() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp new file mode 100644 index 000000000..c2ff3ea1b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Reshape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ReshapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +// TODO Test types other than FLOAT32. + +TEST_F(ReshapeTest, Regular) +{ + Shape input_shape{1, 2, 2, 3}; + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape shape_shape{2}; + std::vector<int32_t> shape_data{3, 4}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); +} + +TEST_F(ReshapeTest, UnknownDimension) +{ + Shape input_shape{2, 1, 2, 3}; + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape shape_shape{3}; + std::vector<int32_t> shape_data{2, -1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp new file mode 100644 index 000000000..e2ddd6a7b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeBilinear.h" + +#include "kernels/Utils.h" + +#include "PALResizeBilinear.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output, + const ResizeBilinearParams ¶ms) + : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params) +{ +} + +void ResizeBilinear::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32); + if (params().half_pixel_centers && params().align_corners) + throw std::runtime_error("If half_pixel_centers is True, align_corners must be False."); + LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2); + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = getTensorData<int32_t>(size())[0]; + output_shape.dim(2) = getTensorData<int32_t>(size())[1]; + output_shape.dim(3) = input()->shape().dim(3); + output()->resize(output_shape); +} + +void ResizeBilinear::execute() const +{ + tflite::ResizeBilinearParams op_params{}; + op_params.align_corners = params().align_corners; + op_params.half_pixel_centers = params().half_pixel_centers; + switch (output()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::ResizeBilinear( + op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::ResizeBilinear( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h new file mode 100644 index 000000000..b7bdc2ab7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ResizeBilinear : public KernelWithParams<ResizeBilinearParams> +{ +public: + ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output, + const ResizeBilinearParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp new file mode 100644 index 000000000..933a1128c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeBilinear.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data, + bool align_corners, bool half_pixel_centers) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, + std::initializer_list<float> output_data, bool align_corners, + bool half_pixel_centers) +{ + // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero + // point 0. + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0); + + ResizeBilinearParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template <typename T> class ResizeBilinearTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes); + +TYPED_TEST(ResizeBilinearTest, SimpleTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 5, 6, // + 7, 9, 10, // + 9, 11, 12, // + 4, 8, 10, // + 8, 12, 14, // + 10, 14, 16, // + }, + false, false); + SUCCEED(); +} + +TEST(ResizeBilinearTest, HalfPixelCenterFloatTest) +{ + Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 1, 2, // + 3, 4, // + 1, 2, // + 3, 4 // + }, + {3, 3}, + { + 1, 1.5, 2, // + 2, 2.5, 3, // + 3, 3.5, 4, // + 1, 1.5, 2, // + 2, 2.5, 3, // + 3, 3.5, 4, // + }, + false, true); + SUCCEED(); +} + +TEST(ResizeBilinearTest, HalfPixelCenterUint8Test) +{ + Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 12, 16 // + }, + {3, 3}, + { + 2, 4, 6, // + 6, 7, 9, // + 9, 10, 12, // + 4, 7, 10, // + 8, 10, 13, // + 12, 14, 16, // + }, + false, true); + SUCCEED(); +} + +TEST(ResizeBilinearTest, InputShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, SizeShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, SizeDimInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeBilinearTest, InvalidParams_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeBilinearParams params{}; + params.align_corners = true; + params.half_pixel_centers = true; + + ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp new file mode 100644 index 000000000..306cefbc2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeNearestNeighbor.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h> +#include "PALResizeNearestNeighbor.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size, + Tensor *output, + const ResizeNearestNeighborParams ¶ms) + : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params) +{ +} + +void ResizeNearestNeighbor::configure() +{ + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2); + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = getTensorData<int32_t>(size())[0]; + output_shape.dim(2) = getTensorData<int32_t>(size())[1]; + output_shape.dim(3) = input()->shape().dim(3); + output()->resize(output_shape); +} + +void ResizeNearestNeighbor::execute() const +{ + tflite::ResizeNearestNeighborParams op_params{}; + op_params.align_corners = params().align_corners; + op_params.half_pixel_centers = params().half_pixel_centers; + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::ResizeNearestNeighbor( + op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output())); + break; + case DataType::U8: + luci_interpreter_pal::ResizeNearestNeighbor( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()), + getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h new file mode 100644 index 000000000..137d031cf --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams> +{ +public: + ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output, + const ResizeNearestNeighborParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp new file mode 100644 index 000000000..7ade02a6f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ResizeNearestNeighbor.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data, + bool align_corners, bool half_pixel_centers) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); +} + +template <> +void Check<uint8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> size_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<int32_t> size_data, + std::initializer_list<float> output_data, bool align_corners, + bool half_pixel_centers) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> quant_param = + quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f, + std::max(input_data) > 0 ? std::max(input_data) : 0.f); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first); + + ResizeNearestNeighborParams params{}; + params.align_corners = align_corners; + params.half_pixel_centers = half_pixel_centers; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template <typename T> class ResizeNearestNeighborTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes); + +TYPED_TEST(ResizeNearestNeighborTest, SimpleTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12, // + 4, 4, 10, // + 4, 4, 10, // + 10, 10, 16, // + }, + false, false); +} + +TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 6, 6, // + 9, 12, 12, // + 9, 12, 12, // + 4, 10, 10, // + 10, 16, 16, // + 10, 16, 16, // + }, + true, false); +} + +TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest) +{ + Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + {3, 3}, + { + 3, 6, 6, // + 9, 12, 12, // + 9, 12, 12, // + 4, 10, 10, // + 10, 16, 16, // + 10, 16, 16, // + }, + false, true); +} + +TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + ResizeNearestNeighborParams params{}; + params.align_corners = false; + params.half_pixel_centers = false; + + ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp new file mode 100644 index 000000000..1b6a5cc3b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReverseV2.h" +#include "kernels/Utils.h" +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +namespace luci_interpreter +{ + +namespace kernels +{ + +ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output) + : Kernel({input, axes}, {output}) +{ +} + +void ReverseV2::configure() +{ + assert(axes()->shape().num_dims() == 1); + assert(input()->shape().num_dims() >= axes()->shape().num_elements()); + if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 && + input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 && + input()->element_type() != DataType::S64) + { + throw std::runtime_error("Unsupported input type."); + } + if (axes()->element_type() != DataType::S32) + { + throw std::runtime_error("Unsupported axes type."); + } + if (axes()->shape().num_elements() > 1) + { + throw std::runtime_error("Current implementation does not support more than 1 axis."); + } + int axis_value = getTensorData<int32_t>(axes())[0]; + if (axis_value < 0 || axis_value >= input()->shape().num_dims()) + { + throw std::runtime_error("Invalid axes value"); + } + assert(input()->element_type() == output()->element_type()); + + output()->resize(input()->shape()); +} + +void ReverseV2::execute() const +{ + int axis_value = getTensorData<int32_t>(axes())[0]; + switch (output()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + tflite::reference_ops::Reverse<uint8_t>( + axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported output type"); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h new file mode 100644 index 000000000..51211c703 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H +#define LUCI_INTERPRETER_KERNELS_REVERSE_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ReverseV2 : public Kernel +{ +public: + ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *axes() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp new file mode 100644 index 000000000..c0025faca --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/ReverseV2.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> class ReverseV2Test : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(ReverseV2Test, DataTypes); + +TYPED_TEST(ReverseV2Test, MultiDimensions) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + // TypeParam + std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; + Shape input_shape{4, 3, 2}; + std::vector<int32_t> axis_data{1}; + Shape axis_shape{1}; + + std::vector<TypeParam> output_data{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8, + 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}; + std::vector<int32_t> output_shape{4, 3, 2}; + + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); + + ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp new file mode 100644 index 000000000..6dd92dc98 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Rsqrt.h" +#include "kernels/Utils.h" + +#include <stdexcept> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Rsqrt::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Rsqrt::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Rsqrt::evalFloat() const +{ + auto in = getTensorData<float>(input()); + auto out = getTensorData<float>(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = 1.f / std::sqrt(*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h new file mode 100644 index 000000000..adc5bcfa2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H +#define LUCI_INTERPRETER_KERNELS_RSQRT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Rsqrt : public Kernel +{ +public: + Rsqrt(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp new file mode 100644 index 000000000..3c6494232 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Rsqrt.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Rsqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(RsqrtTest, SimpleRsqrt) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 5, 4, 8, 2, // + 6, 7.5, 9, 0.3, // + }, + /*output_data=*/ + { + 0.44721360, 0.5, 0.35355339, 0.70710678, // + 0.40824829, 0.36514837, 0.33333333, 1.8257419, // + }); +} + +TEST(RsqrtTest, Input_Output_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Rsqrt kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(RsqrtTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Rsqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp new file mode 100644 index 000000000..40d79aaa3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/Utils.h" +#include "PALSVDF.h" + +#include <tensorflow/lite/kernels/internal/quantization_util.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ +TfLiteFusedActivation get_tflite_activation(Activation activation) +{ + switch (activation) + { + case luci::FusedActFunc::RELU: + return kTfLiteActRelu; + case luci::FusedActFunc::RELU6: + return kTfLiteActRelu6; + case luci::FusedActFunc::RELU_N1_TO_1: + return kTfLiteActReluN1To1; + case luci::FusedActFunc::TANH: + return kTfLiteActTanh; + case luci::FusedActFunc::SIGN_BIT: + return kTfLiteActSignBit; + case luci::FusedActFunc::NONE: + return kTfLiteActNone; + default: + throw std::runtime_error("Unsupported activation type"); + } +} +} // namespace + +SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms) + : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state}, + {output, scratchpad_activation_state, scratchpad_1, scratchpad_2, + scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6}, + params) +{ + // Do nothing +} + +void SVDF::configure() +{ + const Shape &input_shape = input()->shape(); + const Shape &weight_features_shape = weight_feature()->shape(); + const Shape &weight_time_shape = weight_time()->shape(); + + // Validate Input Tensor: + LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 || + input()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2); + + // Validate inputs and output types + if (input()->element_type() == loco::DataType::S8) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 || + weight_time()->element_type() == loco::DataType::S8); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32); + + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 || + input_activation_state()->element_type() == loco::DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8); + + // Note: now tflite support only ReLU activation for integer SVDF + LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU); + } + else if (weight_feature()->element_type() == loco::DataType::FLOAT32) + { + LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32); + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32); + LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32); + } + else if ((weight_feature()->element_type() == loco::DataType::U8 || + weight_feature()->element_type() == loco::DataType::S8) && + input()->element_type() == loco::DataType::FLOAT32) + { + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + } + else + { + throw std::runtime_error("Unsupported type."); + } + + // Check all the parameters of tensor match within themselves and match the + // input configuration. + const int rank = params().svdf_rank; + const int batch_size = input_shape.dim(0); + const int num_filters = weight_features_shape.dim(0); + LUCI_INTERPRETER_CHECK(rank != 0); + LUCI_INTERPRETER_CHECK(num_filters % rank == 0); + + const int num_units = num_filters / rank; + const int memory_size = weight_time_shape.dim(1); + + // Validate Weight_Feature Input Tensor: + LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1)); + + // Validate Weight_Time Input Tensor: + LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2); + LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters); + + // Validate Bias + if (bias()) + LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units); + + // Validate Input Activation State + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size); + LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters); + + // Resize scratchpad_state to input_activation_state + auto scratchpad_activation_state = getOutputTensors()[1]; + scratchpad_activation_state->resize({batch_size, memory_size * num_filters}); + + // Resize output tensor + output()->resize({batch_size, num_units}); + + luci_interpreter_pal::SetupScratchpadTensor( + input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2], + getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6], + getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units); +} + +void SVDF::execute() const +{ + switch (weight_feature()->element_type()) + { + case loco::DataType::FLOAT32: + evalFloat(); + break; + case loco::DataType::S8: + { + if (input()->element_type() == loco::DataType::S8) + evalInteger(); + else + // TODO:: support hybrid SVDF op + throw std::runtime_error("Hybrid type is not currently supported"); + break; + } + default: + throw std::runtime_error("Unsupported type"); + } +} + +void SVDF::evalInteger() const +{ + const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() / + input_activation_state()->scale()); + const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() * + weight_time()->scale() / output()->scale()); + + int32_t effective_scale_1_a; + int effective_scale_1_b; + int32_t effective_scale_2_a; + int effective_scale_2_b; + + tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b); + tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b); + + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = get_tflite_activation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad = getOutputTensors()[2]; + auto output_temp = getOutputTensors()[3]; + + int32_t input_zp = input()->zero_point(); + int32_t output_zp = output()->zero_point(); + luci_interpreter_pal::IntegerSVDF( + params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()), + getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()), + getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()), + getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad), + getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b, + effective_scale_2_a, effective_scale_2_b, input_zp, output_zp); +} + +void SVDF::evalFloat() const +{ + TfLiteSVDFParams params_svdf{}; + params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs; + params_svdf.rank = params().svdf_rank; + params_svdf.activation = get_tflite_activation(params().activation); + + auto scratchpad_activation_state = getOutputTensors()[1]; + // Note: it is expected that activation_state input variable tensor reset to zero, + // also expected that this variable tensor doesn't have buffer + auto scratchpad_data = getTensorData<float>(scratchpad_activation_state); + std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0); + + auto scratchpad_1 = getOutputTensors()[2]; + + luci_interpreter_pal::FloatSVDF( + params_svdf, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(weight_feature()), getTensorData<float>(weight_feature()), + getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()), + getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data, + getTensorShape(output()), getTensorData<float>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h new file mode 100644 index 000000000..335a6cd8f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H +#define LUCI_INTERPRETER_KERNELS_SVDF_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SVDF : public KernelWithParams<SVDFParams> +{ +public: + SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time, + const Tensor *bias, const Tensor *input_activation_state, Tensor *output, + Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2, + Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6, + const SVDFParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *weight_feature() const { return _inputs[1]; } + const Tensor *weight_time() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } + const Tensor *input_activation_state() const { return _inputs[4]; } + + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalInteger() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SVDF_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp new file mode 100644 index 000000000..82bd9b009 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SVDF.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class SVDFTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(SVDFTest, FullIntegerTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape bias_shape{units}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083, + 0.17660543, 0.52949083, -0.77931279}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1); + std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5); + std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1); + std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512); + std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16); + + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5); + + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::S8>( + weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second, + weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = makeInputTensor<DataType::S16>( + weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second, + weight_time_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>( + bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor( + DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::S32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::RELU; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0}; + + std::vector<int32_t> ref_output_shape{batches, units}; + EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, FloatTest) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465, + 0.35867718, 0.36897406, 0.73463392}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(scratchpad_activation_state); + _memory_manager->allocate_memory(scratchpad_1); + _memory_manager->allocate_memory(scratchpad_2); + _memory_manager->allocate_memory(scratchpad_3); + _memory_manager->allocate_memory(scratchpad_4); + _memory_manager->allocate_memory(scratchpad_5); + _memory_manager->allocate_memory(scratchpad_6); + kernel.execute(); + + std::vector<float> ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883, + -0.03004015, 0.09565311, 0.1587342, 0.00784263}; + + std::vector<float> ref_output_shape{batches, units}; + const float tolerance = 1e-5; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(SVDFTest, Unsupported_Type_Configure_NEG) +{ + const int32_t batches = 2; + const int32_t input_size = 3; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, input_size}; + Shape weight_feature_shape{num_filters, input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SVDFTest, Invalid_Input_Shape_NEG) +{ + const int32_t batches = 2; + const int32_t right_input_size = 3; + const int32_t wrong_input_size = 4; + const int32_t units = 4; + const int32_t memory_size = 10; + const int32_t rank = 1; + const int32_t num_filters = units * rank; + + Shape input_shape{batches, wrong_input_size}; + Shape weight_feature_shape{num_filters, right_input_size}; + Shape weight_time_shape{num_filters, memory_size}; + Shape activation_state_shape{batches, memory_size * num_filters}; + + std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1}; + + std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347, + 0.22197971, 0.12416199, 0.27901134, 0.27557442, + 0.3905206, -0.36137494, -0.06634006, -0.10640851}; + + std::vector<float> weight_time_data{ + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, + + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, + + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, + + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>( + weight_feature_shape, weight_feature_data, _memory_manager.get()); + Tensor weight_time_tensor = + makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get()); + Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32); + activation_state_tensor.resize(activation_state_shape); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, ""); + Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, ""); + + SVDFParams params{}; + params.activation = Activation::NONE; + params.asymmetric_quantize_inputs = false; + params.svdf_rank = rank; + + SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr, + &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1, + &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp new file mode 100644 index 000000000..0429fe1e5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/Utils.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms) + : KernelWithParams<ShapeParams>({input}, {output}, params) +{ +} + +void ShapeKernel::configure() +{ + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or + output()->element_type() == DataType::S64); + const auto input_shape = input()->shape(); + + Shape output_shape(1); + output_shape.dim(0) = input_shape.num_dims(); + + output()->resize(output_shape); +} + +void ShapeKernel::execute() const +{ + switch (params().out_type) + { + case DataType::S32: + evalInt<int32_t>(); + break; + case DataType::S64: + evalInt<int64_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> void ShapeKernel::evalInt() const +{ + const auto input_shape = input()->shape(); + + auto output_data = getTensorData<T>(output()); + + for (int i = 0; i < input_shape.num_dims(); ++i) + { + output_data[i] = input_shape.dim(i); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h new file mode 100644 index 000000000..cfaadec91 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H +#define LUCI_INTERPRETER_KERNELS_SHAPE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ShapeKernel : public KernelWithParams<ShapeParams> +{ +public: + ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void evalInt() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp new file mode 100644 index 000000000..4763e016c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Shape.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class ShapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager) +{ + Shape input_shape{1, 3, 1, 3, 5}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(dataType); + + ShapeParams params{}; + params.out_type = dataType; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<T> ref_output_data{1, 3, 1, 3, 5}; + EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data); + + std::vector<int32_t> ref_output_shape{5}; + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(ShapeTest, OutTypeInt) +{ + + // Run for int32_t output + runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get()); + // Run for int64_t output + runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get()); + + SUCCEED(); +} + +TEST_F(ShapeTest, Invalid_Output_Type_NEG) +{ + Shape input_shape{1, 3}; + + Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, ""); + Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32); + + ShapeParams params{}; + params.out_type = loco::DataType::FLOAT32; + + ShapeKernel kernel(&input_tensor, &output_tensor, params); + + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp new file mode 100644 index 000000000..2fe2c5471 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Slice.h" +#include "Utils.h" +#include "PALSlice.h" + +#include <cassert> +#include <cstring> + +namespace luci_interpreter +{ + +namespace kernels +{ +const int max_dim = 4; + +Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output) + : Kernel({input, begin, size}, {output}) +{ +} + +template <typename T> +Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size) +{ + Shape output_shape = Shape(input->shape().num_dims()); + for (int idx = 0; idx < input->shape().num_dims(); idx++) + { + T size_value = getTensorData<T>(size)[idx]; + if (size_value < 0) + { + if (size_value != -1) + { + throw std::runtime_error("Invalid size."); + } + size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx]; + } + else + { + if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value) + { + throw std::runtime_error("Invalid begin and size."); + } + } + output_shape.dim(idx) = static_cast<int>(size_value); + } + return output_shape; +} + +template <typename T> +void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size, + std::vector<int> *begins, std::vector<int> *sizes) +{ + for (int idx = dimensions - 1; idx >= 0; --idx) + { + begins->push_back(getTensorData<T>(begin)[idx]); + sizes->push_back(getTensorData<T>(size)[idx]); + } +} + +void Slice::configure() +{ + assert(input()->element_type() == output()->element_type()); + assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64); + assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64); + assert(begin()->shape().num_dims() == 1); + assert(size()->shape().num_dims() == 1); + assert(input()->shape().num_dims() <= max_dim); + + if (begin()->element_type() == DataType::S32) + { + output()->resize(calculateOutputShape<int32_t>(input(), begin(), size())); + } + else if (begin()->element_type() == DataType::S64) + { + output()->resize(calculateOutputShape<int64_t>(input(), begin(), size())); + } + else + { + throw std::runtime_error("Unsupported type."); + } +} + +void Slice::execute() const +{ + std::vector<int> begins; + begins.reserve(max_dim); + std::vector<int> sizes; + sizes.reserve(max_dim); + if (begin()->element_type() == DataType::S32) + { + getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes); + } + else if (begin()->element_type() == DataType::S64) + { + getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes); + } + else + { + throw std::runtime_error("Unsupported begin type."); + } + for (int i = input()->shape().num_dims(); i < max_dim; ++i) + { + begins.push_back(0); + sizes.push_back(1); + } + + assert(begins.size() == 4); + assert(sizes.size() == 4); + tflite::SliceParams op_params{}; + op_params.begin_count = 4; + op_params.size_count = 4; + for (int i = 0; i < 4; i++) + { + op_params.begin[i] = begins[3 - i]; + op_params.size[i] = sizes[3 - i]; + } + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + case DataType::S8: + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<int8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported input type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h new file mode 100644 index 000000000..23c359608 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H +#define LUCI_INTERPRETER_KERNELS_SLICE_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Slice : public Kernel +{ +public: + Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *begin() const { return _inputs[1]; } + const Tensor *size() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SLICE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp new file mode 100644 index 000000000..517982990 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Slice.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> class SliceTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(SliceTest, DataTypes); + +TYPED_TEST(SliceTest, SimpleTest) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; + Shape input_shape{3, 2, 3, 1}; + std::vector<int32_t> begin_data{1, 0, 0, 0}; + Shape begin_shape{4}; + std::vector<int32_t> size_data{2, 1, -1, 1}; + Shape size_shape{4}; + std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5}; + std::vector<int32_t> output_shape{2, 1, 3, 1}; + + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); + + Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); + + Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp new file mode 100644 index 000000000..c230aaa70 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Softmax.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/softmax.h> +#include "PALSoftmax.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms) + : KernelWithParams<SoftmaxParams>({input}, {output}, params) +{ +} + +void Softmax::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1); + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0); + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 || + output()->zero_point() == std::numeric_limits<int8_t>::min()); + tflite::SoftmaxParams op_params{}; + op_params.table = _table; + luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta); + } + output()->resize(input()->shape()); +} + +void Softmax::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S8: + evalQuantized<int8_t>(); + break; + case DataType::U8: + evalQuantized<uint8_t>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Softmax::evalFloat() const +{ + tflite::SoftmaxParams op_params{}; + op_params.beta = params().beta; + + tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +template <typename T> void Softmax::evalQuantized() const +{ + tflite::SoftmaxParams op_params{}; + op_params.table = const_cast<float *>(_table); + op_params.zero_point = output()->zero_point(); + op_params.scale = output()->scale(); + luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta); + luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()), + getTensorShape(output()), getTensorData<T>(output())); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h new file mode 100644 index 000000000..1f281df1c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H +#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Softmax : public KernelWithParams<SoftmaxParams> +{ +public: + Softmax(const Tensor *input, Tensor *output, const SoftmaxParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalQuantized() const; + + float _table[256]; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp new file mode 100644 index 000000000..08e70672d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Softmax.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> constexpr loco::DataType toLocoDataType(); + +template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; } + +template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; } + +template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; } + +template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(toLocoDataType<T>()); + + SoftmaxParams params{}; + params.beta = 0.1; + + Softmax kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> input_quant_param = + quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f), + std::max<float>(std::max<float>(input_data), 0.f)); + std::pair<float, int32_t> output_quant_param = + quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f), + std::max<float>(std::max<float>(output_data), 0.f)); + Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first, + input_quant_param.second, input_data, + memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second); + + SoftmaxParams params{}; + params.beta = 0.1; + + Softmax kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); +} + +template <typename T> class SoftmaxTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; +TYPED_TEST_SUITE(SoftmaxTest, DataTypes); + +TYPED_TEST(SoftmaxTest, Simple) +{ + Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3}, + { + 5, -9, 8, // + -7, 2, -4, // + 1, -2, 9, // + 3, -6, -1, // + }, + { + 0.38514, 0.09497, 0.51989, // + 0.20792, 0.51141, 0.28067, // + 0.25212, 0.18678, 0.56110, // + 0.48149, 0.19576, 0.32275, // + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp new file mode 100644 index 000000000..630cd38c4 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToBatchND.h" +#include "kernels/Utils.h" + +#include "PALSpaceToBatchND.h" + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +const int kInputMinDimensionNum = 3; +const int kInputMaxDimensionNum = 4; + +} // namespace + +SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape, + const Tensor *paddings, Tensor *output) + : Kernel({input, block_shape, paddings}, {output}) +{ +} + +void SpaceToBatchND::configure() +{ + const auto *block_shape_data = block_shape()->data<int32_t>(); + const auto *paddings_data = paddings()->data<int32_t>(); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum); + LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum); + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + + int spatial_dims_num = input()->shape().num_dims() - 2; + + LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1); + LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num); + + LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2); + LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num); + LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2); + + Shape output_shape = Shape(input()->shape().num_dims()); + int output_batch_size = input()->shape().dim(0); + for (int i = 0; i < spatial_dims_num; ++i) + { + int final_dim_size = + (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]); + LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0); + output_shape.dim(i + 1) = final_dim_size / block_shape_data[i]; + output_batch_size = output_batch_size * block_shape_data[i]; + } + output_shape.dim(0) = output_batch_size; + output_shape.dim(input()->shape().num_dims() - 1) = + input()->shape().dim(input()->shape().num_dims() - 1); + output()->resize(output_shape); +} + +void SpaceToBatchND::execute() const +{ + switch (input()->element_type()) + { + tflite::SpaceToBatchParams op_params; + case DataType::FLOAT32: + op_params.output_offset = 0; + luci_interpreter_pal::SpaceToBatchND( + op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), + getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + op_params.output_offset = output()->zero_point(); + luci_interpreter_pal::SpaceToBatchND( + op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), + getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h new file mode 100644 index 000000000..0893003bb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H +#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SpaceToBatchND : public Kernel +{ +public: + SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings, + Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *block_shape() const { return _inputs[1]; } + const Tensor *paddings() const { return _inputs[2]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp new file mode 100644 index 000000000..3a8b0a812 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToBatchND.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> block_shape_shape, + std::initializer_list<int32_t> paddings_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, + std::initializer_list<int32_t> block_shape_data, + std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <> +void Check<uint8_t>( + std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape, + std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data, + std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::pair<float, int32_t> input_quant_param = + quantizationParams<uint8_t>(std::min(input_data), std::max(input_data)); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, output_tensor.scale())); + EXPECT_THAT(extractTensorShape(output_tensor), output_shape); +} + +template <typename T> class SpaceToBatchNDTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes); + +TYPED_TEST(SpaceToBatchNDTest, Simple) +{ + Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2}, + /*paddings_shape=*/{2, 2}, + /*output_shape=*/{6, 2, 2, 1}, + /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0}, + /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0}, + /*output_data=*/{0, 0, 0, -0.5, 0, 0, 0, 0.6, 0, -1.0, 0, -0.7, + 0, 0.2, 0, 0.8, 0, -0.3, 0, -0.9, 0, 0.4, 0, 1.0}); +} + +TEST(SpaceToBatchNDTest, Invalid_Shape_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp new file mode 100644 index 000000000..7c29e8cb0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SpaceToDepth.h" +#include "Utils.h" +#include "PALSpaceToDepth.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms) + : KernelWithParams<SpaceToDepthParams>({input}, {output}, params) +{ +} + +void SpaceToDepth::configure() +{ + assert(input()->shape().num_dims() == 4); + assert(output()->element_type() == DataType::FLOAT32 || + output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 || + output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64); + assert(input()->element_type() == output()->element_type()); + + const int block_size = params().block_size; + const int32_t input_height = input()->shape().dim(1); + const int32_t input_width = input()->shape().dim(2); + int32_t output_height = input_height / block_size; + int32_t output_width = input_width / block_size; + + assert(input_height == output_height * block_size); + assert(input_width == output_width * block_size); + + Shape output_shape(4); + output_shape.dim(0) = input()->shape().dim(0); + output_shape.dim(1) = output_height; + output_shape.dim(2) = output_width; + output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size; + + output()->resize(output_shape); +} + +void SpaceToDepth::execute() const +{ + tflite::SpaceToDepthParams op_params{}; + op_params.block_size = params().block_size; + switch (input()->element_type()) + { + case DataType::FLOAT32: + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h new file mode 100644 index 000000000..e66316b11 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H +#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +#include <vector> + +namespace luci_interpreter +{ +namespace kernels +{ + +class SpaceToDepth : public KernelWithParams<SpaceToDepthParams> +{ +public: + SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp new file mode 100644 index 000000000..4af488618 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SpaceToDepth.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> class SpaceToDepthTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes); + +TYPED_TEST(SpaceToDepthTest, SimpleCase) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<TypeParam>(); + std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8}; + Shape input_shape{1, 2, 2, 2}; + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8}; + std::vector<int32_t> output_shape{1, 1, 1, 8}; + Tensor output_tensor = makeOutputTensor(element_type); + + SpaceToDepthParams params{}; + params.block_size = 2; + + SpaceToDepth kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), + ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp new file mode 100644 index 000000000..1a563f307 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Split.h" + +#include "Utils.h" + +#include "PALSplit.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs) + : Kernel({axis, input}, std::move(outputs)) +{ +} + +void Split::configure() +{ + assert(axis()->shape().num_elements() == 1); + _axis_value = getTensorData<int32_t>(axis())[0]; + if (_axis_value < 0) + _axis_value += input()->shape().num_dims(); + assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims()); + + const int32_t input_size = input()->shape().dim(_axis_value); + assert(input_size % _outputs.size() == 0); + const int32_t slice_size = input_size / _outputs.size(); + + Shape output_shape = input()->shape(); + output_shape.dim(_axis_value) = slice_size; + for (Tensor *output : _outputs) + { + output->resize(output_shape); + } +} + +void Split::execute() const +{ + tflite::SplitParams params{}; + params.num_split = _outputs.size(); + params.axis = _axis_value; + +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors<scalar, false> all_outputs(_outputs); \ + luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ + all_outputs.shapes(), all_outputs.data()); \ + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_SPLIT(float); + break; + case DataType::U8: + TF_LITE_SPLIT(uint8_t); + break; + default: + throw std::runtime_error("Unsupported type."); + } +#undef TF_LITE_SPLIT +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.h b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h new file mode 100644 index 000000000..9542b1e56 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_H +#define LUCI_INTERPRETER_KERNELS_SPLIT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Split : public Kernel +{ +public: + Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs); + + const Tensor *axis() const { return _inputs[0]; } + const Tensor *input() const { return _inputs[1]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + int32_t _axis_value{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPLIT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp new file mode 100644 index 000000000..283cd9aa9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Split.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, + std::vector<std::vector<T>> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<T>(); + Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get()); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + + std::vector<Tensor> output_tensors; + output_tensors.reserve(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensors.emplace_back(makeOutputTensor(element_type)); + } + + std::vector<Tensor *> output_tensor_ptrs(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs)); + kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_splits; ++i) + { + EXPECT_THAT(extractTensorData<T>(output_tensors[i]), + ::testing::ElementsAreArray(output_data[i])); + } +} + +template <typename T> class SplitTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(SplitTest, DataTypes); + +TYPED_TEST(SplitTest, FourDimensional) +{ + Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, // + {9, 10, 11, 12, 13, 14, 15, 16}, // + }); + Check<TypeParam>( + /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 9, 10, 11, 12}, // + {5, 6, 7, 8, 13, 14, 15, 16}, // + }); + Check<TypeParam>( + /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 5, 6, 9, 10, 13, 14}, // + {3, 4, 7, 8, 11, 12, 15, 16}, // + }); + Check<TypeParam>( + /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 3, 5, 7, 9, 11, 13, 15}, // + {2, 4, 6, 8, 10, 12, 14, 16}, // + }); +} + +TYPED_TEST(SplitTest, OneDimensional) +{ + Check<TypeParam>( + /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8}, + {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); +} + +TYPED_TEST(SplitTest, NegativeAxis) +{ + Check<TypeParam>( + /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, // + {9, 10, 11, 12, 13, 14, 15, 16}, + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp new file mode 100644 index 000000000..aa6820889 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SplitV.h" + +#include "Utils.h" + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector<Tensor *> outputs) + : Kernel({input, size_splits, axis}, std::move(outputs)) +{ +} + +void SplitV::configure() +{ + assert(axis()->shape().num_elements() == 1); + _axis_value = getTensorData<int32_t>(axis())[0]; + if (_axis_value < 0) + _axis_value += input()->shape().num_dims(); + assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims()); + + auto num_split = static_cast<int32_t>(_outputs.size()); + auto sizes_data = getTensorData<int32_t>(size_splits()); + + assert(size_splits()->shape().num_dims() == 1); + + int32_t sum = 0; + const auto num_dims_size_spits = size_splits()->shape().dim(0); + int32_t count_neg_dim = 0; + + for (int32_t i = 0; i < num_dims_size_spits - 1; ++i) + { + if (sizes_data[i] != -1) + { + sum += sizes_data[i]; + } + else + { + count_neg_dim++; + } + } + assert(count_neg_dim < 2); + assert(size_splits()->shape().num_elements() == num_split); + + auto output_shape = input()->shape(); + for (int32_t i = 0; i < num_split; ++i) + { + if (sizes_data[i] == -1) + { + output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum; + } + else + { + output_shape.dim(_axis_value) = sizes_data[i]; + } + _outputs[i]->resize(output_shape); + } +} + +void SplitV::execute() const +{ + tflite::SplitParams params{}; + params.num_split = _outputs.size(); + params.axis = _axis_value; + +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors<scalar, false> all_outputs(_outputs); \ + tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ + all_outputs.shapes(), all_outputs.data()); \ + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_SPLIT(float); + break; + case DataType::U8: + TF_LITE_SPLIT(uint8_t); + break; + case DataType::S16: + TF_LITE_SPLIT(int16_t); + break; + default: + throw std::runtime_error("Unsupported type."); + } +#undef TF_LITE_SPLIT +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h new file mode 100644 index 000000000..92f6288fb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H +#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SplitV : public Kernel +{ +public: + SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector<Tensor *> outputs); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size_splits() const { return _inputs[1]; } + const Tensor *axis() const { return _inputs[2]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + int32_t _axis_value{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp new file mode 100644 index 000000000..035bc2122 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SplitV.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(int axis, std::initializer_list<int32_t> splits_size, + std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data, + std::vector<std::vector<T>> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + + auto num_splits = static_cast<int32_t>(splits_size.size()); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor sizes_tensor = + makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get()); + + std::vector<Tensor> output_tensors; + output_tensors.reserve(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensors.emplace_back(makeOutputTensor(element_type)); + } + + std::vector<Tensor *> output_tensor_ptrs(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs)); + kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_splits; ++i) + { + auto tmp = extractTensorData<T>(output_tensors[i]); + EXPECT_THAT(extractTensorData<T>(output_tensors[i]), + ::testing::ElementsAreArray(output_data[i])); + } +} + +template <typename T> class SplitVTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int16_t>; +TYPED_TEST_SUITE(SplitVTest, DataTypes); + +TYPED_TEST(SplitVTest, ThreeDimensional) +{ + Check<TypeParam>( + /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 4, 5, 6, 7, 8, 9}, // + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} // + }); + Check<TypeParam>( + /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 10, 11, 12, 19, 20, 21}, // + {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} // + }); + Check<TypeParam>( + /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 4, 7, 10, 13, 16, 19, 22, 25}, // + {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} // + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp new file mode 100644 index 000000000..46e9fc9ad --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sqrt.h" +#include "kernels/Utils.h" + +#include <stdexcept> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Sqrt::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Sqrt::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sqrt::evalFloat() const +{ + auto in = getTensorData<float>(input()); + auto out = getTensorData<float>(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = std::sqrt(*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h new file mode 100644 index 000000000..4034655ed --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H +#define LUCI_INTERPRETER_KERNELS_SQRT_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sqrt : public Kernel +{ +public: + Sqrt(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQRT_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp new file mode 100644 index 000000000..96835fbfc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sqrt.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Sqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(SqrtTest, SimpleSqrt) +{ + Check( + /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1}, + /*input_data=*/ + { + 0, 8, 2, 4, // + 3, 7, 10, 0.3, // + }, + /*output_data=*/ + { + 0.0, 2.8284271, 1.4142136, 2, // + 1.7320508, 2.6457513, 3.1622777, 0.54772256, // + }); +} + +TEST(SqrtTest, Input_Output_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + Sqrt kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST(SqrtTest, Invalid_Input_Type_NEG) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S64); + + Sqrt kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp new file mode 100644 index 000000000..bc71905c1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Square.h" +#include "kernels/Utils.h" + +#include <stdexcept> +#include <cmath> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Square::configure() +{ + if (input()->element_type() != output()->element_type()) + { + throw std::runtime_error("Input/output tensor data type mismatch."); + } + output()->resize(input()->shape()); +} + +void Square::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Square::evalFloat() const +{ + auto in = getTensorData<float>(input()); + auto out = getTensorData<float>(output()); + auto size = getTensorShape(input()).FlatSize(); + for (auto i = in; i != in + size; ++i) + { + *out = (*i) * (*i); + ++out; + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.h b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h new file mode 100644 index 000000000..73ed5a707 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H +#define LUCI_INTERPRETER_KERNELS_SQUARE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Square : public Kernel +{ +public: + Square(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp new file mode 100644 index 000000000..51662dea7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Square.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(SquareTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Square kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp new file mode 100644 index 000000000..3bafeba4a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SquaredDifference.h" + +#include "kernels/Utils.h" + +#include "kernels/BinaryOpCommon.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output) + : Kernel({input1, input2}, {output}) +{ +} + +void SquaredDifference::configure() +{ + LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type()) + LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type()) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void SquaredDifference::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalSquaredDifference<float>(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +template <typename T> inline void SquaredDifference::evalSquaredDifference() const +{ + BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) { + const T difference = x - y; + return difference * difference; + }); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h new file mode 100644 index 000000000..9327caf93 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H +#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SquaredDifference : public Kernel +{ +public: + SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> inline void evalSquaredDifference() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp new file mode 100644 index 000000000..2819c01e2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SquaredDifference.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(SquaredDifferenceTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape{3, 1, 2}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(SquaredDifferenceTest, FloatBroadcast) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape1{3, 1, 2}; + Shape input_shape2{1}; + std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::vector<float> input_data2{1.0}; + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp new file mode 100644 index 000000000..4a75518c7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Squeeze.h" + +#include "kernels/Utils.h" + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms) + : KernelWithParams<SqueezeParams>({input}, {output}, params) +{ +} + +void Squeeze::configure() +{ + int input_num_dims = input()->shape().num_dims(); + int num_squeeze_dims = params().squeeze_dims.size(); + assert(input_num_dims <= 8); + bool should_squeeze[8] = {false}; + int num_squeezed_dims = 0; + if (num_squeeze_dims == 0) + { + for (int idx = 0; idx < input_num_dims; ++idx) + { + if (input()->shape().dim(idx) == 1) + { + should_squeeze[idx] = true; + ++num_squeezed_dims; + } + } + } + else + { + for (int idx = 0; idx < num_squeeze_dims; ++idx) + { + int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims + : params().squeeze_dims[idx]; + assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1); + if (!should_squeeze[current]) + ++num_squeezed_dims; + should_squeeze[current] = true; + } + } + Shape output_shape(input_num_dims - num_squeezed_dims); + for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx) + { + if (!should_squeeze[in_idx]) + { + output_shape.dim(out_idx++) = input()->shape().dim(in_idx); + } + } + output()->resize(output_shape); +} + +void Squeeze::execute() const +{ + assert(input()->shape().num_elements() == output()->shape().num_elements()); + + const auto *input_data = input()->data<void>(); + auto *output_data = output()->data<void>(); + std::memcpy(output_data, input_data, + getDataTypeSize(input()->element_type()) * input()->shape().num_elements()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h new file mode 100644 index 000000000..687af5158 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H +#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Squeeze : public KernelWithParams<SqueezeParams> +{ +public: + Squeeze(const Tensor *input, Tensor *output, const SqueezeParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp new file mode 100644 index 000000000..1bc0b6459 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Squeeze.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<T> input_data, std::initializer_list<T> output_data, + std::initializer_list<int32_t> squeeze_dims) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + SqueezeParams params{}; + params.squeeze_dims = squeeze_dims; + + Squeeze kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +template <typename T> class SqueezeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(SqueezeTest, DataTypes); + +TYPED_TEST(SqueezeTest, TotalTest) +{ + Check<TypeParam>( + /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + {-1, 0}); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp new file mode 100644 index 000000000..a8730d861 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/StridedSlice.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/strided_slice.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, + const Tensor *strides, Tensor *output, const StridedSliceParams ¶ms) + : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params) +{ +} + +void StridedSlice::configure() +{ + assert(begin()->shape().num_dims() == 1); + assert(end()->shape().num_dims() == 1); + assert(strides()->shape().num_dims() == 1); + assert(input()->element_type() == output()->element_type()); + assert(begin()->element_type() == DataType::S32); + assert(end()->element_type() == DataType::S32); + assert(strides()->element_type() == DataType::S32); + assert(input()->shape().num_dims() <= 4); + if (params().ellipsis_mask != 0) + { + throw std::runtime_error("ellipsis_mask is not implemented yet."); + } + if (params().new_axis_mask != 0) + { + throw std::runtime_error("new_axis_mask is not implemented yet."); + } + if (input()->element_type() == DataType::U8) + { + assert(input()->scale() == output()->scale()); + assert(input()->zero_point() == output()->zero_point()); + } + tflite::StridedSliceParams op_params{}; + op_params.start_indices_count = input()->shape().num_dims(); + op_params.stop_indices_count = input()->shape().num_dims(); + op_params.strides_count = input()->shape().num_dims(); + + for (int i = 0; i < input()->shape().num_dims(); i++) + { + op_params.start_indices[i] = getTensorData<int32_t>(begin())[i]; + op_params.stop_indices[i] = getTensorData<int32_t>(end())[i]; + op_params.strides[i] = getTensorData<int32_t>(strides())[i]; + } + op_params.begin_mask = params().begin_mask; + op_params.ellipsis_mask = 0; + op_params.end_mask = params().end_mask; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = params().shrink_axis_mask; + std::vector<int32_t> output_shape_vector; + for (int i = 0; i < input()->shape().num_dims(); i++) + { + int idx = input()->shape().num_dims() - i - 1; + int32_t stride = getTensorData<int32_t>(strides())[idx]; + assert(stride != 0); + int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx); + int32_t end = + ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin); + + const bool shrink_axis = params().shrink_axis_mask & (1 << idx); + if (shrink_axis) + { + end = begin + 1; + } + + int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride)); + dim_shape = dim_shape < 0 ? 0 : dim_shape; + if (!shrink_axis) + { + output_shape_vector.push_back(dim_shape); + } + } + Shape output_shape = Shape(output_shape_vector.size()); + for (size_t i = 0; i < output_shape_vector.size(); i++) + { + output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1]; + } + output()->resize(output_shape); +} + +void StridedSlice::execute() const +{ + tflite::StridedSliceParams op_params{}; + op_params.start_indices_count = input()->shape().num_dims(); + op_params.stop_indices_count = input()->shape().num_dims(); + op_params.strides_count = input()->shape().num_dims(); + + for (int i = 0; i < input()->shape().num_dims(); i++) + { + op_params.start_indices[i] = getTensorData<int32_t>(begin())[i]; + op_params.stop_indices[i] = getTensorData<int32_t>(end())[i]; + op_params.strides[i] = getTensorData<int32_t>(strides())[i]; + } + op_params.begin_mask = params().begin_mask; + op_params.ellipsis_mask = 0; + op_params.end_mask = params().end_mask; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = params().shrink_axis_mask; + + switch (input()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + case DataType::S32: + tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()), + getTensorData<int32_t>(input()), getTensorShape(output()), + getTensorData<int32_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h new file mode 100644 index 000000000..fc96893a7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H +#define LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class StridedSlice : public KernelWithParams<StridedSliceParams> +{ +public: + StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides, + Tensor *output, const StridedSliceParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *begin() const { return _inputs[1]; } + const Tensor *end() const { return _inputs[2]; } + const Tensor *strides() const { return _inputs[3]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp new file mode 100644 index 000000000..399cdebed --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/StridedSlice.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +TEST(StridedSliceTest, Float) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape{2, 3, 2}; + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape begin_shape{3}; + std::vector<int32_t> begin_data{0, 0, 0}; + Shape end_shape{3}; + std::vector<int32_t> end_data{1, 3, 2}; + Shape strides_shape{3}; + std::vector<int32_t> strides_data{1, 1, 1}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + StridedSliceParams params{}; + params.begin_mask = 0; + params.end_mask = 0; + params.ellipsis_mask = 0; + params.new_axis_mask = 0; + params.shrink_axis_mask = 1; + + StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> output_shape{3, 2}; + std::vector<float> output_data{1, 2, 3, 4, 5, 6}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +TEST(StridedSliceTest, Uint8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Shape input_shape{2, 3, 2}; + std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Shape begin_shape{3}; + std::vector<int32_t> begin_data{0, 0, 0}; + Shape end_shape{3}; + std::vector<int32_t> end_data{1, 3, 2}; + Shape strides_shape{3}; + std::vector<int32_t> strides_data{1, 1, 1}; + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0); + + StridedSliceParams params{}; + params.begin_mask = 0; + params.end_mask = 0; + params.ellipsis_mask = 0; + params.new_axis_mask = 0; + params.shrink_axis_mask = 1; + + StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, + params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<int32_t> output_shape{3, 2}; + std::vector<float> output_data{1, 2, 3, 4, 5, 6}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp new file mode 100644 index 000000000..24b6a72e5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sub.h" +#include "kernels/Utils.h" + +#include "PALSub.h" + +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms) + : KernelWithParams<SubParams>({input1, input2}, {output}, params) +{ +} + +void Sub::configure() +{ + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type())) + LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type())) + output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape())); +} + +void Sub::execute() const +{ + switch (input1()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::S64: + evalInteger<int64_t>(); + break; + case DataType::S32: + evalInteger<int32_t>(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Sub::evalFloat() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<float>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), + getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); + } + else + { + luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); + } +} + +template <typename T> void Sub::evalInteger() const +{ + tflite::ArithmeticParams params{}; + fillArithmeticActivationRange<T>(params, _params.activation); + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()), + getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()), + getTensorShape(input2()), getTensorData<T>(input2()), + getTensorShape(output()), getTensorData<T>(output())); + } +} + +void Sub::evalQuantized() const +{ + const auto input1_scale = static_cast<double>(input1()->scale()); + const auto input2_scale = static_cast<double>(input2()->scale()); + const auto output_scale = static_cast<double>(output()->scale()); + + const int left_shift = 20; + const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale); + const double real_input1_multiplier = input1_scale / twice_max_input_scale; + const double real_input2_multiplier = input2_scale / twice_max_input_scale; + const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale); + + int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{}; + int input1_shift{}, input2_shift{}, output_shift{}; + quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift); + quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift); + quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift); + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ArithmeticParams params{}; + params.left_shift = left_shift; + // The kernel expects inputs' zero points to be negated. + params.input1_offset = -input1()->zero_point(); // Note the '-'. + params.input1_multiplier = input1_multiplier; + params.input1_shift = input1_shift; + params.input2_offset = -input2()->zero_point(); // Note the '-'. + params.input2_multiplier = input2_multiplier; + params.input2_shift = input2_shift; + params.output_offset = output()->zero_point(); + params.output_multiplier = output_multiplier; + params.output_shift = output_shift; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes( + getTensorShape(input1()), getTensorShape(input2()), ¶ms); + + if (need_broadcast) + { + tflite::reference_ops::BroadcastSubSlow( + params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()), + getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output())); + } + else + { + tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), + getTensorShape(input2()), getTensorData<uint8_t>(input2()), + getTensorShape(output()), getTensorData<uint8_t>(output())); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h new file mode 100644 index 000000000..23952b3bd --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SUB_H +#define LUCI_INTERPRETER_KERNELS_SUB_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Sub : public KernelWithParams<SubParams> +{ +public: + Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams ¶ms); + + const Tensor *input1() const { return _inputs[0]; } + const Tensor *input2() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + template <typename T> void evalInteger() const; + void evalQuantized() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SUB_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp new file mode 100644 index 000000000..9abafd49a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Sub.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +#include <algorithm> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; +using std::pair; +using std::vector; +using std::transform; +using std::initializer_list; + +class SubTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +// for quantized Add, the error shouldn't exceed step +float GetTolerance(float min, float max) +{ + float kQuantizedStep = (max - min) / 255.0; + return kQuantizedStep; +} + +TEST_F(SubTest, Uint8) +{ + Shape base_shape = {2, 3, 1, 2}; + vector<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + vector<vector<float>> output_data = { + {-0.5f, 2.0f, 0.1f, 1.8f, -1.3f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, -0.1f, -0.4f, + 0.6f, -1.4f, 1.2f, -1.6f, -0.2f, -2.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f, + -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f}, + {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f}, + {-0.5f, 2.1f, -0.6f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f, + 0.6f, -1.3f, 0.5f, -1.4f, 1.2f, -0.7f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f, + -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f}, + {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}}; + + float kQuantizedTolerance = GetTolerance(-3.f, 3.f); + pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f); + for (size_t i = 0; i < output_data.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } + + // Inversion step for output_data, because subtract is not commutative operation + auto multiply = [](auto &i) { + transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; }); + }; + for_each(output_data.begin(), output_data.end(), multiply); + + // Re-run with exchanged inputs. + for (size_t i = 0; i < output_data.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data[i], kQuantizedTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +TEST_F(SubTest, Float) +{ + Shape base_shape = {2, 3, 1, 2}; + vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}}; + vector<vector<float>> test_outputs = { + {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f, + 0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f, + 0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}}; + + vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, + 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f}; + vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) + << "With shape number " << i; + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i])); + } +} + +template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager) +{ + using dtype = typename loco::DataTypeImpl<DType>::Type; + Shape base_shape = {2, 3, 1, 2}; + std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; + std::vector<std::vector<dtype>> test_outputs = { + {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0, + 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0}, + {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0}, + {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0, + 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0}, + {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}}; + std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1}; + std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6}; + for (size_t i = 0; i < test_shapes.size(); ++i) + { + Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager); + Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager); + Tensor output_tensor = makeOutputTensor(DType); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i]) + << "With shape number " << i; + } +}; + +TEST_F(SubTest, SInt32) +{ + CheckInteger<loco::DataType::S32>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, SInt64) +{ + CheckInteger<loco::DataType::S64>(_memory_manager.get()); + SUCCEED(); +} + +TEST_F(SubTest, Input_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Output_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +TEST_F(SubTest, Invalid_Input_Type_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U64); + + SubParams params{}; + params.activation = Activation::RELU; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(SubTest, Mismatching_Input_Int_Types_NEG) +{ + Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S32); + + SubParams params{}; + params.activation = Activation::NONE; + + Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp new file mode 100644 index 000000000..c4fa16912 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Tanh.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/tanh.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {} + +void Tanh::configure() +{ + LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type()); + if (input()->element_type() == DataType::U8) + { + populateLookupTable(); + } + output()->resize(input()->shape()); +} + +void Tanh::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + evalQuantized(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void Tanh::evalFloat() const +{ + tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); +} + +void Tanh::evalQuantized() const +{ + const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output())); + uint8_t *output_data = getTensorData<uint8_t>(output()); + const uint8_t *input_data = getTensorData<uint8_t>(input()); + for (int i = 0; i < size; ++i) + { + output_data[i] = getTableValue(input_data[i]); + } +} + +void Tanh::populateLookupTable() +{ + const auto input_scale = static_cast<double>(input()->scale()); + const auto input_zero_point = static_cast<int32_t>(input()->zero_point()); + const auto output_scale = static_cast<double>(output()->scale()); + const auto output_zero_point = static_cast<int32_t>(output()->zero_point()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits<uint8_t>::max(); + int32_t minval = std::numeric_limits<uint8_t>::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + const float transformed = std::tanh(dequantized); + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); + setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)), + static_cast<uint8_t>(val)); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h new file mode 100644 index 000000000..8017c9638 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TANH_H +#define LUCI_INTERPRETER_KERNELS_TANH_H + +#include "core/Kernel.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Tanh : public Kernel +{ +public: + Tanh(const Tensor *input, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void populateLookupTable(); + void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; }; + uint8_t getTableValue(uint8_t idx) const { return _table[idx]; }; + +private: + uint8_t _table[256]{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TANH_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp new file mode 100644 index 000000000..bfae479a9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Tanh.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +class TanhTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(TanhTest, Float) +{ + Shape input_shape{1, 2, 4, 1}; + std::vector<float> input_data{ + 0, -6, 2, 4, // + 3, -2, 10, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tanh kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 0, -0.9999877, 0.9640275, 0.999329, // + 0.99505475, -0.9640275, 1, 0.7615941, // + }; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(TanhTest, Uint8) +{ + float kMin = -1; + float kMax = 127.f / 128.f; + float kTanhTolerance = 2 * (1. / 256); + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax); + std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax); + std::vector<float> input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + + Tanh kernel(&input_tensor, &output_tensor); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + }; + std::vector<int32_t> ref_output_shape{2, 6, 4, 1}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(TanhTest, InputTypeInvalid_NEG) +{ + std::vector<int64_t> input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + + Tanh kernel(&input_tensor, &output_tensor); + _memory_manager->allocate_memory(output_tensor); + EXPECT_ANY_THROW(kernel.execute()); +} + +TEST_F(TanhTest, InputOutputMismatch_NEG) +{ + std::vector<float> input_data{ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8); + + Tanh kernel(&input_tensor, &output_tensor); + EXPECT_ANY_THROW(kernel.configure()); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp new file mode 100644 index 000000000..4d983adda --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/TestUtils.h" + +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace testing +{ + +using ::testing::FloatNear; +using ::testing::Matcher; + +Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); } + +Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point) +{ + return Tensor(element_type, {}, {{scale}, {zero_point}}, ""); +} + +std::vector<float> dequantizeTensorData(const Tensor &tensor) +{ + if (tensor.element_type() == DataType::U8) + { + std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } + if (tensor.element_type() == DataType::S8) + { + std::vector<int8_t> data = extractTensorData<int8_t>(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } + else if (tensor.element_type() == DataType::S16) + { + // S16 quantization is symmetric, so zero point should be zero. + for (auto zp : tensor.zero_points()) + { + (void)zp; + assert(zp == 0); + } + + std::vector<int16_t> data = extractTensorData<int16_t>(tensor); + if (tensor.scales().size() == 1) + { + return dequantize(data.data(), data.size(), tensor.scale(), 0); + } + + // quantize_dimension breaks shape into two parts: + // inner dimensions that contains continuous data with one quantization type + // outer dimensions that contains other dimensions + const Shape shape = tensor.shape(); + const int32_t quantized_dimension = tensor.quantized_dimension(); + assert(quantized_dimension < shape.num_dims()); + size_t outer_dims_size = 1; + int32_t quant_dim_size = shape.dim(quantized_dimension); + size_t inner_dims_size = 1; + assert(quant_dim_size == tensor.scales().size()); + + for (int i = 0; i < quantized_dimension; ++i) + outer_dims_size *= shape.dim(i); + for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i) + inner_dims_size *= shape.dim(i); + + assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size); + + std::vector<float> dequantized_data; + dequantized_data.reserve(shape.num_elements()); + for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it) + for (int32_t channel = 0; channel < quant_dim_size; ++channel) + { + float scale = tensor.scales()[channel]; + size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel); + std::vector<float> part_dequantized_data = + dequantize(data.data() + offset, inner_dims_size, scale, 0); + dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(), + part_dequantized_data.end()); + } + return dequantized_data; + } + else + { + throw std::runtime_error("Unsupported type."); + } +} + +Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error) +{ + std::vector<Matcher<float>> matchers; + matchers.reserve(values.size()); + for (const float v : values) + { + matchers.emplace_back(FloatNear(v, max_abs_error)); + } + return ElementsAreArray(matchers); +} + +std::vector<int32_t> extractTensorShape(const Tensor &tensor) +{ + std::vector<int32_t> result; + int dims = tensor.shape().num_dims(); + for (int i = 0; i < dims; i++) + { + result.push_back(tensor.shape().dim(i)); + } + return result; +} + +} // namespace testing +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h new file mode 100644 index 000000000..1f5a0c308 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H +#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H + +#include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" + +#include <type_traits> + +#include <gtest/gtest.h> +#include <gmock/gmock.h> + +namespace luci_interpreter +{ +namespace kernels +{ +namespace testing +{ + +template <typename T> +std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point); + +template <DataType DT> +Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data, + IMemoryManager *memory_manager) +{ + Tensor tensor(DT, shape, {}, ""); + memory_manager->allocate_memory(tensor); + tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type)); + return tensor; +} + +/** + * @brief Create layer-wise quantized tensor + * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64 + * @param shape desired tensor shape + * @param scale scale of quantized number + * @param zero_point zero point of quantized number, should be 0 for signed datatypes + * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor + * @return created tensor + */ +template <DataType DT> +Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point, + const std::vector<float> &data, IMemoryManager *memory_manager) +{ + using NativeT = typename DataTypeImpl<DT>::Type; + Tensor tensor(DT, shape, {{scale}, {zero_point}}, ""); + std::vector<NativeT> quantized_data = + quantize<NativeT>(data.data(), data.size(), scale, zero_point); + memory_manager->allocate_memory(tensor); + tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); + return tensor; +} + +/** + * @brief Create channel-wise quantized tensor + * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64 + * @param shape desired tensor shape + * @param scales scales of quantized number + * @param zero_points zero points of quantized number, should be 0 for signed datatypes + * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels + * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor + * @return created tensor + */ +template <DataType DT> +Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales, + const std::vector<int32_t> &zero_points, int quantized_dimension, + const std::vector<float> &data, IMemoryManager *memory_manager) +{ + using NativeT = typename DataTypeImpl<DT>::Type; + assert(quantized_dimension < shape.num_dims()); + Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, ""); + + // quantize_dimension breaks shape into two parts: + // inner dimensions that contains continuous data with one quantization type + // outer dimensions that contains other dimensions + size_t outer_dims_size = 1; + int32_t quant_dim_size = shape.dim(quantized_dimension); + size_t inner_dims_size = 1; + assert(quant_dim_size == scales.size()); + assert(quant_dim_size == zero_points.size()); + + for (int i = 0; i < quantized_dimension; ++i) + outer_dims_size *= shape.dim(i); + for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i) + inner_dims_size *= shape.dim(i); + + assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size); + + std::vector<NativeT> quantized_data; + quantized_data.reserve(shape.num_elements()); + for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it) + for (int32_t channel = 0; channel < quant_dim_size; ++channel) + { + int32_t zero_point = zero_points[channel]; + float scale = scales[channel]; + size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel); + std::vector<NativeT> part_quantized_data = + quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point); + quantized_data.insert(quantized_data.end(), part_quantized_data.begin(), + part_quantized_data.end()); + } + assert(quantized_data.size() == shape.num_elements()); + memory_manager->allocate_memory(tensor); + tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); + return tensor; +} + +Tensor makeOutputTensor(DataType element_type); +Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point); + +std::vector<int32_t> extractTensorShape(const Tensor &tensor); + +// Returns the corresponding DataType given the type T. +template <typename T> constexpr DataType getElementType() +{ + if (std::is_same<T, float>::value) + return DataType::FLOAT32; + if (std::is_same<T, double>::value) + return DataType::FLOAT64; + if (std::is_same<T, uint8_t>::value) + return DataType::U8; + if (std::is_same<T, uint16_t>::value) + return DataType::U16; + if (std::is_same<T, uint32_t>::value) + return DataType::U32; + if (std::is_same<T, uint64_t>::value) + return DataType::U64; + if (std::is_same<T, int8_t>::value) + return DataType::S8; + if (std::is_same<T, int16_t>::value) + return DataType::S16; + if (std::is_same<T, int32_t>::value) + return DataType::S32; + if (std::is_same<T, int64_t>::value) + return DataType::S64; + if (std::is_same<T, bool>::value) + return DataType::BOOL; + return DataType::Unknown; +} + +template <typename T> std::vector<T> extractTensorData(const Tensor &tensor) +{ + const auto *data_ptr = tensor.data<T>(); + return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements()); +} + +std::vector<float> dequantizeTensorData(const Tensor &tensor); + +// Array version of `::testing::FloatNear` matcher. +::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, + float max_abs_error = 1.0e-5f); + +template <typename T> +std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point) +{ + static_assert(std::is_integral<T>::value, "Integral type expected."); + + float q_min{}, q_max{}; + if (std::is_signed<T>::value) + { + q_min = -std::numeric_limits<T>::max(); + q_max = std::numeric_limits<T>::max(); + } + else + { + q_min = 0; + q_max = std::numeric_limits<T>::max(); + } + + std::vector<T> q; + for (size_t i = 0; i < num_elements; ++i) + { + const auto &f = data[i]; + q.push_back(static_cast<T>( + std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale)))))); + } + return q; +} + +template <typename T> +std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point) +{ + static_assert(std::is_integral<T>::value, "Integral type expected."); + std::vector<float> f; + for (size_t i = 0; i < num_elements; ++i) + { + const T &q = data[i]; + f.push_back(scale * (q - zero_point)); + } + return f; +} + +// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned). +template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max) +{ + static_assert(std::is_integral<T>::value, "Integral type expected."); + int32_t zero_point = 0; + float scale = 0; + const T qmin = std::numeric_limits<T>::lowest(); + const T qmax = std::numeric_limits<T>::max(); + const float qmin_double = qmin; + const float qmax_double = qmax; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + assert(f_max >= 0); + assert(f_min <= 0); + if (f_min == f_max) + { + // Special case where the min,max range is a point. Should be {0}. + assert(f_max == 0); + assert(f_min == 0); + return {scale, zero_point}; + } + + // General case. + // + // First determine the scale. + scale = (f_max - f_min) / (qmax_double - qmin_double); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const float zero_point_from_min = qmin_double - f_min / scale; + const float zero_point_from_max = qmax_double - f_max / scale; + + const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale); + + const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale); + + const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + + T nudged_zero_point = 0; + if (zero_point_double < qmin_double) + { + nudged_zero_point = qmin; + } + else if (zero_point_double > qmax_double) + { + nudged_zero_point = qmax; + } + else + { + nudged_zero_point = static_cast<T>(std::round(zero_point_double)); + } + + // The zero point should always be in the range of quantized value, + // // [qmin, qmax]. + assert(qmax >= nudged_zero_point); + assert(qmin <= nudged_zero_point); + zero_point = nudged_zero_point; + // finally, return the values + return {scale, zero_point}; +} + +inline float getTolerance(float min, float max, int quantize_steps) +{ + return ((max - min) / quantize_steps); +} + +} // namespace testing +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp new file mode 100644 index 000000000..802d87295 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Transpose.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/transpose.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output) + : Kernel({input, perm}, {output}) +{ +} + +void Transpose::configure() +{ + // Transpose op only supports 1D-4D input arrays. + int dims = input()->shape().num_dims(); + const int32_t *perm_data = getTensorData<int32_t>(perm()); + + assert(input()->shape().num_dims() <= 4); + assert(input()->element_type() == output()->element_type()); + + assert(perm()->shape().num_dims() == 1); + assert(perm()->shape().dim(0) == dims); + + Shape output_shape(dims); + for (int i = 0; i < dims; i++) + { + assert(perm_data[i] < dims && perm_data[i] >= 0); + output_shape.dim(i) = input()->shape().dim(perm_data[i]); + } + + output()->resize(output_shape); +} + +void Transpose::execute() const +{ + tflite::TransposeParams params{}; + const int32_t *perm_data = getTensorData<int32_t>(perm()); + const int32_t size = perm()->shape().dim(0); + params.perm_count = size; + for (int i = 0; i < size; i++) + params.perm[i] = perm_data[i]; + switch (input()->element_type()) + { + case DataType::FLOAT32: + tflite::reference_ops::Transpose(params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); + break; + case DataType::U8: + tflite::reference_ops::Transpose(params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h new file mode 100644 index 000000000..d6f89c352 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSE_H +#define LUCI_INTERPRETER_KERNELS_TRANSPOSE_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Transpose : public Kernel +{ +public: + Transpose(const Tensor *input, const Tensor *perm, Tensor *output); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *perm() const { return _inputs[1]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp new file mode 100644 index 000000000..43be8f8b9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Transpose.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape, + std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, + std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(element_type); + + Transpose kernel(&input_tensor, &perm_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +template <typename T> class TransposeTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(TransposeTest, DataTypes); + +TYPED_TEST(TransposeTest, Small3D) +{ + Check<TypeParam>(/*input_shape=*/{2, 3, 4}, /*perm_shape=*/{3}, /*output_shape=*/{4, 2, 3}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + /*perm_data=*/{2, 0, 1}, + /*output_data=*/{0, 4, 8, 12, 16, 20, 1, 5, 9, 13, 17, 21, + 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23}); +} + +TYPED_TEST(TransposeTest, Large4D) +{ + Check<TypeParam>( + /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, + /*perm_data=*/{2, 0, 1, 3}, + /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44, + 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104, + 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49, + 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109, + 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54, + 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114, + 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59, + 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119}); +} + +TYPED_TEST(TransposeTest, Large2D) +{ + Check<TypeParam>( + /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10}, + /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119}, + /*perm_data=*/{1, 0}, + /*output_data=*/{0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49, + 61, 73, 85, 97, 109, 2, 14, 26, 38, 50, 62, 74, 86, 98, 110, + 3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 4, 16, 28, 40, 52, + 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113, + 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55, + 67, 79, 91, 103, 115, 8, 20, 32, 44, 56, 68, 80, 92, 104, 116, + 9, 21, 33, 45, 57, 69, 81, 93, 105, 117, 10, 22, 34, 46, 58, + 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119}); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp new file mode 100644 index 000000000..1b5f9d941 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/TransposeConv.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms) + : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, + {output, scratch_tensor}, params) +{ +} + +TransposeConv::~TransposeConv() +{ + // Define destructor here, to delete vector of qunatized multipliers properly +} + +void TransposeConv::configure() +{ + assert(output_shape()->shape().num_dims() == 1); + assert(input()->shape().num_dims() == 4); + assert(filter()->shape().num_dims() == 4); + assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 || + input()->element_type() == DataType::S16); + assert(input()->element_type() == output()->element_type()); + assert(input()->shape().dim(3) == filter()->shape().dim(3)); + + const int num_dims = output_shape()->shape().dim(0); + Shape out_shape(num_dims); + const auto *shape_data = getTensorData<int32_t>(output_shape()); + for (int i = 0; i < num_dims; i++) + out_shape.dim(i) = shape_data[i]; + output()->resize(out_shape); + + const int32_t filter_height = filter()->shape().dim(1); + const int32_t filter_width = filter()->shape().dim(2); + const int32_t output_height = out_shape.dim(1); + const int32_t output_width = out_shape.dim(2); + + const int32_t unused_output_height = + computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1); + const int32_t unused_output_width = + computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1); + + _padding_height = + computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height); + _padding_width = + computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width); + + if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->resize(output()->shape()); + const std::vector<double> real_multipliers = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + _quant_multipliers = quantizeMultipliers(real_multipliers); + } + else + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->set_allocatable(false); + } +} + +void TransposeConv::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + evalFloat(); + break; + case DataType::U8: + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } + break; + case DataType::S16: + evalQuantizedS16(); + break; + default: + throw std::runtime_error("Unsupported type."); + } +} + +void TransposeConv::evalFloat() const +{ + tflite::ConvParams op_params{}; + op_params.padding_type = tflite::PaddingType::kSame; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; + op_params.stride_height = params().stride_height; + op_params.stride_width = params().stride_width; + tflite::reference_ops::TransposeConv(op_params, // + getTensorShape(input()), getTensorData<float>(input()), // + getTensorShape(filter()), getTensorData<float>(filter()), // + getTensorShape(bias()), getTensorData<float>(bias()), // + getTensorShape(output()), getTensorData<float>(output()), // + tflite::RuntimeShape(), nullptr); +} + +void TransposeConv::evalQuantized() const +{ + tflite::ConvParams op_params{}; + op_params.padding_type = tflite::PaddingType::kSame; + op_params.padding_values.height = _padding_height; + op_params.padding_values.width = _padding_width; + op_params.stride_height = params().stride_height; + op_params.stride_width = params().stride_width; + // The kernel expects input and filter zero points to be negated. + op_params.input_offset = -input()->zero_point(); // Note the '-'. + op_params.weights_offset = -filter()->zero_point(); // Note the '-'. + op_params.output_offset = output()->zero_point(); + op_params.output_multiplier = _quant_multipliers[0].multiplier; + op_params.output_shift = _quant_multipliers[0].shift; + op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min(); + op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max(); + + auto scratch_tensor = getOutputTensors()[1]; + + tflite::reference_ops::TransposeConv(op_params, // + getTensorShape(input()), getTensorData<uint8>(input()), // + getTensorShape(filter()), getTensorData<uint8>(filter()), // + getTensorShape(bias()), getTensorData<int32_t>(bias()), // + getTensorShape(output()), getTensorData<uint8>(output()), // + tflite::RuntimeShape(), nullptr, // + getTensorData<int32_t>(scratch_tensor)); +} + +void TransposeConv::evalQuantizedPerChannel() const +{ + const auto *input_data = getTensorData<uint8_t>(input()); + const auto *filter_data = getTensorData<uint8_t>(filter()); + const auto *bias_data = getTensorData<int32_t>(bias()); + auto *output_data = getTensorData<uint8_t>(output()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData<int32_t>(scratch_tensor); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t)); + + BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const uint8_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const uint8_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast<int32_t>(input_val - input()->zero_point()) * + static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc += output()->zero_point(); + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +void TransposeConv::evalQuantizedS16() const +{ + const auto *input_data = getTensorData<int16_t>(input()); + const auto *filter_data = getTensorData<int16_t>(filter()); + const auto *bias_data = getTensorData<int64_t>(bias()); + auto *output_data = getTensorData<int16_t>(output()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData<int64_t>(scratch_tensor); + + const Shape &input_shape = input()->shape(); + const Shape &filter_shape = filter()->shape(); + const Shape &output_shape = output()->shape(); + + const int32_t batches = input_shape.dim(0); + const int32_t input_height = input_shape.dim(1); + const int32_t input_width = input_shape.dim(2); + const int32_t input_depth = input_shape.dim(3); + const int32_t output_depth = filter_shape.dim(0); + const int32_t filter_height = filter_shape.dim(1); + const int32_t filter_width = filter_shape.dim(2); + const int32_t output_height = output_shape.dim(1); + const int32_t output_width = output_shape.dim(2); + + const int32_t stride_height = _params.stride_height; + const int32_t stride_width = _params.stride_width; + + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); + + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t)); + + BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); + for (int32_t batch = 0; batch < batches; ++batch) + { + for (int32_t in_y = 0; in_y < input_height; ++in_y) + { + for (int32_t in_x = 0; in_x < input_width; ++in_x) + { + for (int32_t in_c = 0; in_c < input_depth; ++in_c) + { + const int32_t out_y_origin = in_y * stride_height - _padding_height; + const int32_t out_x_origin = in_x * stride_width - _padding_width; + for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int32_t out_x = out_x_origin + filter_x; + const int32_t out_y = out_y_origin + filter_y; + if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width)) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + const int16_t input_val = + input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)]; + const int16_t filter_val = + filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)]; + scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] += + static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val); + } + } + } + } + } + } + } + for (int32_t out_y = 0; out_y < output_height; ++out_y) + { + for (int32_t out_x = 0; out_x < output_width; ++out_x) + { + for (int32_t out_c = 0; out_c < output_depth; ++out_c) + { + int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)]; + if (bias_data) + { + acc += bias_data[out_c]; + } + int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier( + acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift); + + scaled_acc = std::max(scaled_acc, activation_min); + scaled_acc = std::min(scaled_acc, activation_max); + + output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc; + } + } + } + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h new file mode 100644 index 000000000..cea0cf3c7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H +#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class ChannelQuantMultipliers; + +class TransposeConv : public KernelWithParams<TransposeConvParams> +{ +public: + TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms); + + ~TransposeConv(); + + const Tensor *output_shape() const { return _inputs[0]; } + const Tensor *filter() const { return _inputs[1]; } + const Tensor *input() const { return _inputs[2]; } + const Tensor *bias() const { return _inputs[3]; } + Tensor *output() const { return _outputs[0]; } + + void configure() override; + void execute() const override; + +private: + void evalFloat() const; + void evalQuantized() const; + void evalQuantizedPerChannel() const; + void evalQuantizedS16() const; + +private: + int32_t _padding_height{}; + int32_t _padding_width{}; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + std::vector<ChannelQuantMultipliers> _quant_multipliers; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp new file mode 100644 index 000000000..4856e1b87 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/TransposeConv.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T, typename B> +void Check(std::initializer_list<int32_t> output_shape_shape, + std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data, + std::initializer_list<T> input_data, std::initializer_list<B> bias_data, + std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height, + int32_t stride_width) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + constexpr DataType element_type = getElementType<T>(); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get()); + Tensor weight_tensor = + makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get()); + Tensor input_data_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + + DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + Tensor output_tensor = makeOutputTensor(element_type); + + TransposeConvParams params{}; + params.padding = padding; + params.stride_height = stride_height; + params.stride_width = stride_width; + + if (bias_data.size() != 0) + { + Tensor bias_tensor = + makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get()); + TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + } + else + { + TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + } + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); +} + +TEST(TransposeConvTest, FloatSimple) +{ + Check<float, float>( + /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1}, + /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*bias_data=*/{}, + /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365}, + /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1); + + SUCCEED(); +} + +TEST(TransposeConvTest, FloatTwoFiltersTest) +{ + Check<float, float>( + /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2}, + /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1}, + /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + /*bias_data=*/{}, + /*output_data=*/ + {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760}, + /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1); + + SUCCEED(); +} + +TEST(TransposeConvTest, SimpleBiasTest) +{ + Check<float, float>( + /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1}, + /*input_shape=*/{1, 2, 2, 1}, + /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2}, + /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}, + /*input_data=*/{1, 2, 3, 4}, + /*bias_data=*/{3, 4}, + /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21, + 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34, + 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76}, + /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2); + + SUCCEED(); +} + +TEST(TransposeConvTest, UInt8) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96 + auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64 + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>( + {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, + 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, UInt8_CWQ) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + const int32_t output_channels = 2; + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + // Choose quantization parameters carefully. + auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128 + auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64 + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17)); + filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>( + {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, SInt16) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + std::vector<int32_t> output_shape_data{1, 5, 5, 2}; + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST(TransposeConvTest, SInt16_CWQ_weights) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + const int output_channels = 2; + const Shape input_shape{1, 2, 2, 1}; + const Shape filter_shape{output_channels, 3, 3, 1}; + const Shape bias_shape{output_channels}; + std::vector<int32_t> output_shape_data{1, 5, 5, output_channels}; + + std::vector<float> input_data{1, 2, 3, 4}; + std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; + std::vector<float> bias_data{3, 4}; + + std::vector<float> ref_output_data{ + 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, // + 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, // + 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, // + 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, // + 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // + }; + + const float input_scale = 0.25; + const float output_scale = 0.5; + const std::vector<float> filter_scales{0.2f, 0.5f}; + std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale}; + const std::vector<int32_t> zerop(2, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, + filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + + TransposeConvParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + + TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, + &output_tensor, &scratch_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp new file mode 100644 index 000000000..9127241c0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Unpack.h" + +#include "kernels/Utils.h" + +#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> + +#include <stdexcept> + +namespace luci_interpreter +{ + +namespace kernels +{ + +Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams ¶ms) + : KernelWithParams<UnpackParams>({input}, std::move(outputs), params) +{ +} + +void Unpack::configure() +{ + const Shape &input_shape = input()->shape(); + + int axis = _params.axis; + if (axis < 0) + axis += input()->shape().num_dims(); + assert(axis >= 0 && axis < input_shape.num_dims()); + + Shape output_shape(input_shape.num_dims() - 1); + int out_index = 0; + for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index) + { + if (in_index != axis) + output_shape.dim(out_index++) = input_shape.dim(in_index); + } + + for (Tensor *output : _outputs) + { + assert(output->element_type() == input()->element_type()); + output->resize(output_shape); + } +} + +template <typename T> void Unpack::executeImpl() const +{ + tflite::UnpackParams params{}; + params.axis = _params.axis; + params.num_split = _outputs.size(); + VectorOfTensors<T, false> all_outputs(_outputs); + tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()), + **all_outputs.shapes(), all_outputs.data()); +} + +void Unpack::execute() const +{ + switch (input()->element_type()) + { + case DataType::FLOAT32: + return executeImpl<float>(); + case DataType::U8: + return executeImpl<uint8_t>(); + default: + throw std::runtime_error("Unsupported type."); + } +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h new file mode 100644 index 000000000..f4a44ecad --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H +#define LUCI_INTERPRETER_KERNELS_UNPACK_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class Unpack : public KernelWithParams<UnpackParams> +{ +public: + Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams ¶ms); + + const Tensor *input() const { return _inputs[0]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + template <typename T> void executeImpl() const; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp new file mode 100644 index 000000000..9384ddc83 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Unpack.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(int axis, Shape input_shape, std::initializer_list<T> input_data, + const std::vector<std::initializer_list<int32_t>> &exp_output_shape, + std::vector<std::initializer_list<T>> exp_output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis); + + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + std::vector<Tensor> output_tensors; + output_tensors.reserve(num_outputs); + for (int i = 0; i < num_outputs; ++i) + { + output_tensors.push_back(makeOutputTensor(element_type)); + } + + std::vector<Tensor *> output_tensor_ptrs(num_outputs); + for (int i = 0; i < num_outputs; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + UnpackParams params{}; + params.axis = axis; + + Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params); + kernel.configure(); + for (int i = 0; i < num_outputs; i++) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_outputs; ++i) + { + EXPECT_THAT(extractTensorData<T>(output_tensors[i]), + ::testing::ElementsAreArray(exp_output_data[i])); + } +} + +template <typename T> class UnpackTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t>; +TYPED_TEST_SUITE(UnpackTest, DataTypes); + +TYPED_TEST(UnpackTest, ThreeOutputs) +{ + Check<TypeParam>(/*axis=*/0, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{2}, {2}, {2}}, + /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeOutputsAxisOne) +{ + Check<TypeParam>(/*axis=*/1, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{3}, {3}}, + /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne) +{ + Check<TypeParam>(/*axis=*/-1, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{3}, {3}}, + /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo) +{ + Check<TypeParam>(/*axis=*/-2, /*input_shape=*/{3, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{2}, {2}, {2}}, + /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}}); +} + +TYPED_TEST(UnpackTest, OneOutput) +{ + Check<TypeParam>(/*axis=*/0, /*input_shape=*/{1, 6}, + /*input_data=*/{1, 2, 3, 4, 5, 6}, + /*exp_output_shape=*/{{6}}, + /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}}); +} + +TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs) +{ + Check<TypeParam>(/*axis=*/2, /*input_shape=*/{2, 2, 2}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8}, + /*exp_output_shape=*/{{2, 2}, {2, 2}}, + /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}}); +} + +TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs) +{ + Check<TypeParam>( + /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1}, + /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}}, + /*exp_output_data=*/ + {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}}); +} + +TYPED_TEST(UnpackTest, VectorToScalar) +{ + Check<TypeParam>(/*axis=*/0, /*input_shape=*/{5}, + /*input_data=*/{1, 2, 3, 4, 5}, + /*exp_output_shape=*/{{}, {}, {}, {}, {}}, + /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}}); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp new file mode 100644 index 000000000..5d8e5db83 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/Utils.h" + +#include <cassert> +#include <cmath> +#include <limits> +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +template <typename T> +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max) +{ + switch (activation) + { + case Activation::NONE: + *activation_min = std::numeric_limits<T>::lowest(); + *activation_max = std::numeric_limits<T>::max(); + break; + case Activation::RELU: + *activation_min = 0; + *activation_max = std::numeric_limits<T>::max(); + break; + case Activation::RELU_N1_TO_1: + *activation_min = -1; + *activation_max = 1; + break; + case Activation::RELU6: + *activation_min = 0; + *activation_max = 6; + break; + default: + throw std::runtime_error("Unsupported activation."); + } +} + +template void calculateActivationRange(Activation activation, float *activation_min, + float *activation_max); +template void calculateActivationRange(Activation activation, int32_t *activation_min, + int32_t *activation_max); +template void calculateActivationRange(Activation activation, int64_t *activation_min, + int64_t *activation_max); + +static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax, + const Tensor *output, int32_t *activation_min, + int32_t *activation_max) +{ + const float scale = output->scale(); + const int32_t zero_point = output->zero_point(); + + auto quantize = [scale, zero_point](float x) { + return zero_point + static_cast<int32_t>(std::round(x / scale)); + }; + + switch (activation) + { + case Activation::NONE: + case Activation::TANH: + *activation_min = qmin; + *activation_max = qmax; + break; + case Activation::RELU: + *activation_min = std::max(qmin, quantize(0.0f)); + *activation_max = qmax; + break; + case Activation::RELU_N1_TO_1: + *activation_min = std::max(qmin, quantize(-1.0f)); + *activation_max = std::min(qmax, quantize(1.0f)); + break; + case Activation::RELU6: + *activation_min = std::max(qmin, quantize(0.0f)); + *activation_max = std::min(qmax, quantize(6.0f)); + break; + default: + throw std::runtime_error("Unsupported activation."); + } +} + +void calculateActivationRangeQuantized(Activation activation, const Tensor *output, + int32_t *activation_min, int32_t *activation_max) +{ + assert(output->zero_points().size() == 1); + int32_t qmin{}; + int32_t qmax{}; + switch (output->element_type()) + { + case DataType::U8: + qmin = 0; + qmax = std::numeric_limits<uint8_t>::max(); + break; + case DataType::S8: + qmin = -std::numeric_limits<int8_t>::max(); + qmax = std::numeric_limits<int8_t>::max(); + break; + case DataType::S16: + // For now, assume that signed int16 type implies signed symmetric quantization. + assert(output->zero_point() == 0); + qmin = -std::numeric_limits<int16_t>::max(); + qmax = std::numeric_limits<int16_t>::max(); + break; + default: + throw std::runtime_error("Unsupported type."); + } + + calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min, + activation_max); +} + +void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) +{ + if (double_multiplier == 0.0) + { + *quantized_multiplier = 0; + *shift = 0; + return; + } + + const double q = std::frexp(double_multiplier, shift); + auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31))); + + if (q_fixed == (INT64_C(1) << 31)) + { + q_fixed /= 2; + ++*shift; + } + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + // A shift amount smaller than -31 would cause all bits to be shifted out + // and thus all results would be zero. We implement that instead with + // q_fixed==0, so as to avoid hitting issues with right-shift + // operations with shift amounts greater than 31. Note that this happens + // roughly when abs(double_multiplier) < 2^-31 and the present handling means + // that we're effectively flushing tiny double_multiplier's to zero. + // We could conceivably handle values in the range (roughly) [32, 63] + // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view + // the present handling is just doing 'flush denormals to zero'. We could + // reconsider and actually generate nonzero denormals if a need arises. + if (*shift < -31) + { + *shift = 0; + q_fixed = 0; + } + *quantized_multiplier = static_cast<int32_t>(q_fixed); +} + +void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift) +{ + assert(double_multiplier < 1.0); + assert(double_multiplier > 0.0); + int shift; + quantizeMultiplier(double_multiplier, quantized_multiplier, &shift); + assert(shift <= 0); + *left_shift = shift; +} + +Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape) +{ + const int num_input1_dims = input1_shape.num_dims(); + const int num_input2_dims = input2_shape.num_dims(); + const int num_out_dims = std::max(num_input1_dims, num_input2_dims); + Shape output_shape(num_out_dims); + + for (int i = 0; i < num_out_dims; ++i) + { + const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1; + const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1; + + bool need_broadcast = input1_dim != input2_dim; + bool can_broadcast = input1_dim == 1 || input2_dim == 1; + LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast); + + output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim); + } + + return output_shape; +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h new file mode 100644 index 000000000..ebeb20e66 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H +#define LUCI_INTERPRETER_KERNELS_UTILS_H + +#include "core/KernelParams.h" +#include "luci_interpreter/core/Tensor.h" + +#include <tensorflow/lite/kernels/internal/types.h> + +#include <cassert> +#include <cstdint> +#include <stdexcept> + +namespace luci_interpreter +{ +namespace kernels +{ + +#define LUCI_INTERPRETER_CHECK(cond) \ + if (!(cond)) \ + throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \ + std::string(#cond) + ") was not true."); + +inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, + int32_t filter_size, int32_t out_size) +{ + const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; + return padding > 0 ? padding : 0; +} + +inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size, + int32_t filter_size, int32_t out_size, int32_t *offset) +{ + int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size); + total_padding = total_padding > 0 ? total_padding : 0; + *offset = total_padding % 2; + return total_padding / 2; +} + +inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, + int32_t stride, int32_t dilation_rate = 1) +{ + const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; + switch (padding) + { + case Padding::SAME: + return (image_size + stride - 1) / stride; + case Padding::VALID: + return (image_size + stride - effective_filter_size) / stride; + default: + assert(false); + return 0; + } +} + +inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3) +{ + return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3; +} + +template <typename T> +void calculateActivationRange(Activation activation, T *activation_min, T *activation_max); + +void calculateActivationRangeQuantized(Activation activation, const Tensor *output, + int32_t *activation_min, int32_t *activation_max); + +template <typename T> constexpr bool one_of_types() { return false; } + +// Checks if T is equal to one of {U,Other} types +template <typename T, typename U, typename... Other> constexpr bool one_of_types() +{ + return std::is_same<T, U>::value || one_of_types<T, Other...>(); +} + +/** + * Fills activation min and max parameters depending on given data type and activation + * + * T is a template parameter, so after optimization this code left with only required if case + * + * @tparam T data type of arithmetic operation output tensor + * @param params tflite params to fill + * @param activation luci_interpreter::Activation of arithmetic operation + */ +template <typename T> +void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act) +{ + static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype"); + + if (std::is_same<T, float>::value) + calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max); + if (std::is_same<T, int32_t>::value) + calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max); + else + calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max); +} + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of its exponent. +// +// Handles an arbitrary positive multiplier. The 'shift' output-value is +// basically the 'floating-point exponent' of the multiplier: +// Negative for a right-shift (when the multiplier is <1), positive for a +// left-shift (when the multiplier is >1) +void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); + +// Decompose a double multiplier into a Q0.31 int32 representation of its +// significand, and shift representation of NEGATIVE its exponent --- +// this is intended as a RIGHT-shift. +// +// Restricted to the case where the multiplier < 1 (and non-negative). +void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift); + +Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape); + +inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, + float output_scale) +{ + const double input_product_scale = static_cast<double>(input_scale * filter_scale); + LUCI_INTERPRETER_CHECK(input_product_scale >= 0); + return input_product_scale / static_cast<double>(output_scale); +} + +// TODO rename getQuantizedConvolutionMultiplers to something more general +// it is used for non conv operators too +inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale, + const std::vector<float> &filter_scale, + float output_scale) +{ + std::vector<double> effective_output_scales; + size_t n = filter_scale.size(); + effective_output_scales.reserve(n); + for (size_t i = 0; i < n; ++i) + { + effective_output_scales.push_back( + getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale)); + } + return effective_output_scales; +} + +struct ChannelQuantMultipliers +{ + int shift; + int32_t multiplier; + ChannelQuantMultipliers() = default; +}; + +inline std::vector<ChannelQuantMultipliers> +quantizeMultipliers(const std::vector<double> &effective_scale) +{ + size_t n = effective_scale.size(); + std::vector<ChannelQuantMultipliers> params(n); + for (size_t i = 0; i < n; ++i) + { + quantizeMultiplier(effective_scale[i], ¶ms[i].multiplier, ¶ms[i].shift); + } + return params; +} + +// Helper wrapper to hide broadcast logic +template <typename T> class BroadcastableWrapper +{ +public: + BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {} + + T operator[](int idx) { return _v[idx * _stride]; } + +private: + const std::vector<T> &_v; + int _stride; +}; + +inline tflite::RuntimeShape getTensorShape(const Tensor *tensor) +{ + if (tensor == nullptr) + return tflite::RuntimeShape(); + + const Shape &shape = tensor->shape(); + tflite::RuntimeShape runtime_shape(shape.num_dims()); + for (int i = 0; i < shape.num_dims(); ++i) + { + runtime_shape.SetDim(i, shape.dim(i)); + } + return runtime_shape; +} + +template <typename T> const T *getTensorData(const Tensor *tensor) +{ + return tensor != nullptr ? tensor->data<T>() : nullptr; +} + +template <typename T> T *getTensorData(Tensor *tensor) +{ + return tensor != nullptr ? tensor->data<T>() : nullptr; +} + +// A list of tensors in a format that can be used by kernels like split and +// concatenation. +template <typename T, bool is_const> class VectorOfTensors +{ +public: + using ElementT = typename std::conditional<is_const, const T, T>::type; + using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type; + + // Build with the tensors in 'tensor_list'. + explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list) + { + const int num_tensors = tensor_list.size(); + + all_data_.reserve(num_tensors); + all_shape_.reserve(num_tensors); + all_shape_ptr_.reserve(num_tensors); + + for (TensorT *tensor : tensor_list) + { + all_data_.push_back(getTensorData<T>(tensor)); + all_shape_.push_back(getTensorShape(tensor)); + } + + // Taking the pointer from inside a std::vector is only OK if the vector is + // never modified, so we populate all_shape in the previous loop and then we + // are free to grab iterators here. + for (tflite::RuntimeShape &shape : all_shape_) + { + all_shape_ptr_.push_back(&shape); + } + } + // Return a pointer to the data pointers of all tensors in the list. For + // example: + // float* const* f = v.data(); + // f[0][1] is the second element of the first tensor. + ElementT *const *data() const { return all_data_.data(); } + + // Return a pointer the shape pointers of all tensors in the list. For + // example: + // const RuntimeShape* const* d = v.dims(); + // dims[1] are the dimensions of the second tensor in the list. + const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); } + +private: + std::vector<ElementT *> all_data_; + std::vector<tflite::RuntimeShape> all_shape_; + std::vector<tflite::RuntimeShape *> all_shape_ptr_; +}; + +// A list of quantized tensors in a format that can be used by kernels like +// split and concatenation. +template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const> +{ +public: + using typename VectorOfTensors<uint8_t, is_const>::TensorT; + + // Build with the tensors in 'tensor_list'. + explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list) + : VectorOfTensors<uint8_t, is_const>(tensor_list) + { + for (TensorT *tensor : tensor_list) + { + zero_point_.push_back(tensor->zero_point()); + scale_.push_back(tensor->scale()); + } + } + + const float *scale() const { return scale_.data(); } + const int32_t *zero_point() const { return zero_point_.data(); } + +private: + std::vector<int32_t> zero_point_; + std::vector<float> scale_; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_UTILS_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp new file mode 100644 index 000000000..153bd1a99 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/While.h" +#include "kernels/Utils.h" + +#include <cstring> + +namespace luci_interpreter +{ +namespace kernels +{ + +namespace +{ + +void copy(const std::vector<const Tensor *> &src, const std::vector<Tensor *> &dst) +{ + for (size_t i = 0; i < src.size(); ++i) + { + LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type()); + dst[i]->resize(src[i]->shape()); + + const int32_t num_elements = src[i]->shape().num_elements(); + const std::size_t element_size = getDataTypeSize(src[i]->element_type()); + std::memcpy(dst[i]->data<void>(), src[i]->data<void>(), num_elements * element_size); + } +} + +void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst) +{ + std::vector<const Tensor *> const_src; + for (const auto &t : src) + const_src.push_back(t); + copy(const_src, dst); +} + +// TODO: Think about how allocate memory for output in main graph +void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph) +{ + for (auto tensor : tensors) + run_graph->configureAllocations(tensor); +} + +} // namespace + +While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, + RuntimeGraph *cond_graph, RuntimeGraph *body_graph) + : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph) +{ +} + +void While::configure() +{ + LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size()); + LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size()); + LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size()); + + LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size()); + + const auto &cond_outputs = _cond_graph->getOutputTensors(); + LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1) + LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL); +} + +/** + * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data() + */ +void While::execute() const +{ + const auto &cond_inputs = _cond_graph->getInputTensors(); + const auto &cond_outputs = _cond_graph->getOutputTensors(); + + configureTensorsAllocations(cond_inputs, _cond_graph); + + copy(getInputTensors(), cond_inputs); + + const auto &body_inputs = _body_graph->getInputTensors(); + const auto &body_outputs = _body_graph->getOutputTensors(); + + configureTensorsAllocations(body_inputs, _body_graph); + + while (true) + { + _cond_graph->execute(); + + bool cond_value = cond_outputs[0]->data<bool>()[0]; + if (!cond_value) + break; + + copy(cond_inputs, body_inputs); + + _body_graph->execute(); + + copy(body_outputs, cond_inputs); + } + + copy(cond_inputs, getOutputTensors()); +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.h b/compiler/luci-micro/luci-interpreter/src/kernels/While.h new file mode 100644 index 000000000..f758df3f3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H +#define LUCI_INTERPRETER_KERNELS_WHILE_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class While : public Kernel +{ +public: + While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, RuntimeGraph *cond_graph, + RuntimeGraph *body_graph); + + const Tensor *input(int index) const { return _inputs[index]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + RuntimeGraph *const _cond_graph = nullptr; + RuntimeGraph *const _body_graph = nullptr; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_WHILE_H diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp new file mode 100644 index 000000000..cb8f89130 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/RuntimeModule.h" +#include "kernels/Add.h" +#include "kernels/Less.h" +#include "kernels/While.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond, + IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input = + graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + Tensor *output = + graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input}); + graph->setOutputTensors({output}); + + graph->addKernel(std::make_unique<Less>(input, input_cond, output)); + + return graph; +} + +RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add, + IMemoryManager *memory_manager) +{ + RuntimeGraph *graph = module->addGraph(memory_manager); + Tensor *input = + graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + Tensor *output = + graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + + graph->setInputTensors({input}); + graph->setOutputTensors({output}); + + AddParams params{}; + params.activation = Activation::NONE; + graph->addKernel(std::make_unique<Add>(input, input_add, output, params)); + + return graph; +} + +TEST(WhileTest, FloatLoop10) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get()); + Tensor output = makeOutputTensor(DataType::FLOAT32); + + Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get()); + Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get()); + + RuntimeModule module(nullptr); + RuntimeGraph *cond_graph = + buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get()); + RuntimeGraph *body_graph = + buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get()); + + While kernel({&input}, {&output}, cond_graph, body_graph); + kernel.configure(); + memory_manager->allocate_memory(output); + kernel.execute(); + + EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10})); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt new file mode 100644 index 000000000..292771592 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt @@ -0,0 +1,39 @@ +set(SOURCES + GraphLoader.h + GraphLoader.cpp + KernelBuilderHelper.h + KernelBuilderHelper.cpp + KernelBuilder.h + KernelBuilder.cpp + ModuleLoader.h + ModuleLoader.cpp + RuntimeToIR.h + nodes/Builders.h) + +# include kernel specific builders +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "nodes/${NODE}.cpp") +endmacro(REGISTER_KERNEL) +include(${KERNEL_REGISTER_FILE}) + +add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES}) +if (NOT NNCC_LIBRARY_NO_PIC) + set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif(NOT NNCC_LIBRARY_NO_PIC) +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}") +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") + +target_link_libraries(${LUCI_INTERPRETER_LOADER} + PUBLIC luci_lang ${LUCI_INTERPRETER_CORE} + PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +set(TEST_SOURCES KernelBuilder.test.cpp) + +GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER}) diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp new file mode 100644 index 000000000..40207090b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/GraphLoader.h" + +#include "loader/KernelBuilder.h" + +#include <luci/Plan/CircleNodeExecutionPlan.h> +#include <loco/IR/Algorithm.h> + +namespace luci_interpreter +{ +namespace +{ + +template <typename NodeT> Shape getNodeShape(const NodeT *node) +{ + Shape shape(node->rank()); + for (uint32_t i = 0; i < node->rank(); ++i) + { + shape.dim(i) = node->dim(i).value(); + } + return shape; +} + +template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size) +{ + const size_t element_size = getDataTypeSize(DT); + const int32_t num_elements = node->size<DT>(); + + *data_size = num_elements * element_size; + if (*data_size > 0) + { + // FIXME There is no good way to get the pointer to the data currently. + return &node->at<DT>(0); + } + return nullptr; +} + +const void *getNodeData(const luci::CircleConst *node, size_t *data_size) +{ + switch (node->dtype()) + { + case DataType::U8: + return getNodeDataImpl<DataType::U8>(node, data_size); + case DataType::FLOAT32: + return getNodeDataImpl<DataType::FLOAT32>(node, data_size); + case DataType::S8: + return getNodeDataImpl<DataType::S8>(node, data_size); + case DataType::S16: + return getNodeDataImpl<DataType::S16>(node, data_size); + case DataType::S32: + return getNodeDataImpl<DataType::S32>(node, data_size); + case DataType::S64: + return getNodeDataImpl<DataType::S64>(node, data_size); + case DataType::BOOL: + return getNodeDataImpl<DataType::BOOL>(node, data_size); + default: + throw std::runtime_error("Unsupported type."); + } +} + +const void *getNodeData(const luci::CircleCustom *node, size_t *data_size) +{ + if (node->custom_code() != "CircleReferencingConst") + return nullptr; + + // helper struct which describes data loaded to custom_options of CircleReferencingConst node + // TODO move this struct to header + struct ConstDataReference + { + const uint8_t *data = nullptr; + uint32_t size = 0; + }; + + const auto &custom_options = node->custom_options(); + const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data()); + + *data_size = const_data_ref.size; + return const_data_ref.data; +} + +bool isExecutableNode(const luci::CircleNode *node) +{ + switch (node->opcode()) + { + // These nodes denote inputs / outputs of a graph. + case luci::CircleOpcode::CIRCLECONST: + case luci::CircleOpcode::CIRCLEINPUT: + case luci::CircleOpcode::CIRCLEOUTPUT: + case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE: + // The following nodes denote outputs of multiple-output nodes. + case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEIFOUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT: + case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT: + case luci::CircleOpcode::CIRCLESPLITOUT: + case luci::CircleOpcode::CIRCLESPLITVOUT: + case luci::CircleOpcode::CIRCLETOPKV2OUT: + case luci::CircleOpcode::CIRCLEUNIQUEOUT: + case luci::CircleOpcode::CIRCLEUNPACKOUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + case luci::CircleOpcode::CIRCLEWHILEOUT: + return false; + // Custom nodes may be executable and non-executable + case luci::CircleOpcode::CUSTOM: + { + auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node); + + // TODO handle more non-executable Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return false; + + return true; + } + default: + return true; + } +} + +bool isTensorProducingNode(const luci::CircleNode *node) +{ + switch (node->opcode()) + { + // Output nodes do not produce tensors. + case luci::CircleOpcode::CIRCLEOUTPUT: + // The following nodes are multiple-output nodes. They do not produce tensors, the tensors + // are produced by the corresponding *Out nodes instead. + case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM: + case luci::CircleOpcode::CUSTOM: + case luci::CircleOpcode::IF: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4: + case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5: + case luci::CircleOpcode::SPLIT: + case luci::CircleOpcode::SPLIT_V: + case luci::CircleOpcode::TOPK_V2: + case luci::CircleOpcode::UNIQUE: + case luci::CircleOpcode::UNPACK: + case luci::CircleOpcode::WHILE: + return false; + default: + return true; + } +} + +bool isSupportedCustomNode(const luci::CircleNode *node) +{ + const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node); + + // TODO handle more Custom ops here + if (custom_node->custom_code() == "CircleReferencingConst") + return true; + + return false; +} + +} // namespace + +GraphLoader::GraphLoader( + const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager) + : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir), + _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor), + _memory_manager(memory_manager) +{ +} + +void GraphLoader::loadTensors() +{ + for (uint32_t i = 0; i < _graph->nodes()->size(); ++i) + { + const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i)); + + if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node)) + throw std::runtime_error("Unsupported Custom operator. " + node->name()); + + if (!isTensorProducingNode(node)) + continue; + + // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will + // be inferred. + Shape shape{}; + switch (node->opcode()) + { + case luci::CircleOpcode::CIRCLECONST: + case luci::CircleOpcode::CIRCLECUSTOMOUT: + case luci::CircleOpcode::CIRCLEINPUT: + case luci::CircleOpcode::CIRCLEVARIABLE: + shape = getNodeShape(node); + break; + default: + break; + } + + AffineQuantization quantization; + if (node->quantparam() != nullptr) + { + const luci::CircleQuantParam *params = node->quantparam(); + assert(params->scale.size() == params->zerop.size()); + quantization.scale.assign(params->scale.cbegin(), params->scale.cend()); + quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend()); + quantization.quantized_dimension = params->quantized_dimension; + } + + auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization), + node->name()); + + // If node has execution plan then read memory offsets for nodes + // from the beginning of shared memory buffer. Used in Static Memory Manager. + if (luci::has_execution_plan(node)) + { + auto execution_plan = luci::get_execution_plan(node); + assert(!execution_plan.offsets().empty()); + tensor->set_offset(execution_plan.offsets().front()); + } + + if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node)) + { + size_t data_size{}; + const void *const_data = getNodeData(const_node, &data_size); + if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); + tensor->writeData(const_data, data_size); + } + } + else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node)) + { + const auto *custom_node = + loco::must_cast<const luci::CircleCustom *>(custom_out_node->input()); + + if (custom_node->custom_code() == "CircleReferencingConst") + { + size_t data_size{}; + const void *const_data = getNodeData(custom_node, &data_size); + if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); + tensor->writeData(const_data, data_size); + } + } + } + + _node_to_tensor.emplace(node, tensor.get()); + _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node); + + _runtime_graph->addTensor(std::move(tensor)); + } +} + +void GraphLoader::initInputOutputTensors() const +{ + auto input_nodes = loco::input_nodes(_graph); + std::vector<Tensor *> input_tensors(input_nodes.size()); + for (size_t i = 0; i < input_nodes.size(); ++i) + { + input_tensors[i] = _node_to_tensor.at(input_nodes[i]); + _memory_manager->allocate_memory(*input_tensors[i]); + } + _runtime_graph->setInputTensors(input_tensors); + + auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph)); + std::vector<Tensor *> output_tensors(output_nodes.size()); + for (size_t i = 0; i < output_nodes.size(); ++i) + { + const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]); + output_tensors[i] = _node_to_tensor.at(node->from()); + } + _runtime_graph->setOutputTensors(output_tensors); +} + +void GraphLoader::loadOperators() +{ + KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor); + + // Create kernels for executable nodes. This has to be done in execution order. + auto graph = const_cast<loco::Graph *>(_graph); + + auto const graph_nodes = loco::all_nodes(graph); + + // Checking for execution plan in node annotations. + bool has_execution_annotation = true; + auto const checking_exec_plan = [&has_execution_annotation](auto const node) { + const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node); + if (!luci::has_execution_plan(circle_node)) + has_execution_annotation = false; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan); + + if (has_execution_annotation) + { + // Build ordered_nodes vector that stores the order of execution of graph nodes. + std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size()); + + auto const filler = [&ordered_nodes](auto const node) { + const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node); + auto const position = luci::get_execution_plan(circle_node).order_in_plan(); + ordered_nodes.at(position) = circle_node; + }; + std::for_each(begin(graph_nodes), end(graph_nodes), filler); + + for (auto node : ordered_nodes) + { + if (isExecutableNode(node)) + { + std::unique_ptr<Kernel> kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } + } + } + else + { + // If it is impossible to build the execution order plan, + // then we use the default postorder_traversal approach. + for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph))) + { + const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node); + if (isExecutableNode(node)) + { + std::unique_ptr<Kernel> kernel = kernel_builder.build(node); + _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node); + _runtime_graph->addKernel(std::move(kernel)); + } + } + } +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h new file mode 100644 index 000000000..fe066ecf8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H +#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H + +#include "core/RuntimeGraph.h" +#include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" + +#include <loco/IR/Graph.h> + +#include <unordered_map> + +namespace luci_interpreter +{ + +class GraphLoader +{ +public: + GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager); + + void loadTensors(); + void initInputOutputTensors() const; + void loadOperators(); + +private: + const loco::Graph *_graph; + RuntimeGraph *_runtime_graph; + RuntimeToIR &_runtime_to_ir; + IMemoryManager *_memory_manager; + + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph; + std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp new file mode 100644 index 000000000..8483a9a3d --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/KernelBuilder.h" +#include "loader/nodes/Builders.h" + +#include <stdexcept> + +namespace luci_interpreter +{ + +#define CIRCLE_NODE(OPCODE, CLASS) CLASS, +#define CIRCLE_VNODE(OPCODE, CLASS) CLASS, + +// This enum is auxiliary. +// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE, +// because list of target operators is in format of CLASS names +enum class BuilderId +{ +#include <luci/IR/CircleNodes.lst> + Size // casts to count of values in BuilderId enum +}; + +#undef CIRCLE_VNODE +#undef CIRCLE_NODE + +/** + * @brief Registry of kernel builders + * + * This class contains mapping from Opcodes to kernel builder functions + */ + +class KernelBuilderRegistry +{ +public: + using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *, + KernelBuilderHelper &); + + KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr) + { +#define REGISTER_KERNEL(name) \ + register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name); + +#include "KernelsToBuild.lst" + +#undef REGISTER_KERNEL + } + + KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const + { + return _operator_builders.at(size_t(opcode)); + } + +private: + std::vector<KernelBuilderFunc *> _operator_builders; + + void register_kernel_builder(BuilderId id, KernelBuilderFunc *func) + { + // Using BuilderId is a duplicate of luci::CirclreOpcode, + // size_t(id) is equal to size_t(corresponding operation opcode). + assert(size_t(id) < _operator_builders.size()); + _operator_builders[size_t(id)] = func; + } +}; + +KernelBuilder::KernelBuilder( + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) +{ + _builder_registry = std::make_unique<KernelBuilderRegistry>(); +} + +KernelBuilder::~KernelBuilder() +{ + // Need to define in this CPP to hide KernelBuilderRegistry internals. + // This destructor deletes _builder_registry +} + +std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node) +{ + auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode()); + if (specific_builder != nullptr) + return specific_builder(node, *this); + + std::string msg = "Unsupported operator: "; + msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name()); + throw std::invalid_argument(msg.c_str()); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h new file mode 100644 index 000000000..b1f383394 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H +#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H + +#include "loader/KernelBuilderHelper.h" + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +#include <luci/IR/CircleNodeVisitor.h> + +#include <memory> +#include <unordered_map> + +namespace luci_interpreter +{ + +class KernelBuilderRegistry; + +class KernelBuilder : public KernelBuilderHelper +{ +public: + KernelBuilder( + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); + + ~KernelBuilder(); + + std::unique_ptr<Kernel> build(const luci::CircleNode *node); + +private: + std::unique_ptr<KernelBuilderRegistry> _builder_registry; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp new file mode 100644 index 000000000..b221b6921 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -0,0 +1,1376 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/GraphLoader.h" +#include "loader/KernelBuilder.h" +#include "luci_interpreter/SimpleMemoryManager.h" + +#include <kernels/Add.h> +#include <kernels/ArgMax.h> +#include <kernels/AveragePool2D.h> +#include <kernels/BatchMatMul.h> +#include <kernels/Cast.h> +#include <kernels/Concatenation.h> +#include <kernels/Conv2D.h> +#include <kernels/DepthToSpace.h> +#include <kernels/DepthwiseConv2D.h> +#include <kernels/Div.h> +#include <kernels/Elu.h> +#include <kernels/Exp.h> +#include <kernels/Floor.h> +#include <kernels/FloorDiv.h> +#include <kernels/Equal.h> +#include <kernels/FullyConnected.h> +#include <kernels/Greater.h> +#include <kernels/GreaterEqual.h> +#include <kernels/InstanceNorm.h> +#include <kernels/L2Normalize.h> +#include <kernels/L2Pool2D.h> +#include <kernels/LeakyRelu.h> +#include <kernels/Less.h> +#include <kernels/LessEqual.h> +#include <kernels/LocalResponseNormalization.h> +#include <kernels/LogicalAnd.h> +#include <kernels/LogicalNot.h> +#include <kernels/LogicalOr.h> +#include <kernels/Logistic.h> +#include <kernels/LogSoftmax.h> +#include <kernels/Maximum.h> +#include <kernels/MaxPool2D.h> +#include <kernels/Mean.h> +#include <kernels/Minimum.h> +#include <kernels/Mul.h> +#include <kernels/Neg.h> +#include <kernels/NotEqual.h> +#include <kernels/OneHot.h> +#include <kernels/Pad.h> +#include <kernels/PadV2.h> +#include <kernels/Pow.h> +#include <kernels/PRelu.h> +#include <kernels/Relu.h> +#include <kernels/Relu6.h> +#include <kernels/Reshape.h> +#include <kernels/ResizeBilinear.h> +#include <kernels/ResizeNearestNeighbor.h> +#include <kernels/ReverseV2.h> +#include <kernels/Rsqrt.h> +#include <kernels/Slice.h> +#include <kernels/Softmax.h> +#include <kernels/SpaceToDepth.h> +#include <kernels/Split.h> +#include <kernels/SplitV.h> +#include <kernels/Sqrt.h> +#include <kernels/SquaredDifference.h> +#include <kernels/Squeeze.h> +#include <kernels/StridedSlice.h> +#include <kernels/Sub.h> +#include <kernels/Tanh.h> +#include <kernels/Transpose.h> +#include <kernels/TransposeConv.h> +#include <kernels/Unpack.h> + +#include <gmock/gmock.h> + +namespace luci_interpreter +{ +namespace +{ + +using namespace testing; + +class KernelBuilderTest : public Test +{ +protected: + luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); } + void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; + + template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args) + { + auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...); + // The actual type does not matter for the purpose of the tests. + // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry + // actual output types). + node->dtype(loco::DataType::FLOAT32); + return node; + } + + template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index) + { + auto *node_out = createNode<NodeOutT>(); + node_out->input(node); + node_out->index(index); + return node_out; + } + + template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op) + { + std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph; + + RuntimeGraph runtime_graph(nullptr, _memory_manager.get()); + graph_to_runtime_graph[&_graph] = &runtime_graph; + RuntimeToIR runtime_to_ir; + GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph, + _node_to_tensor, _memory_manager.get()); + graph_loader.loadTensors(); + + KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor); + + auto kernel = kernel_builder.build(op); + return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release())); + } + + void checkTensor(const Tensor *tensor, const loco::Node *node) + { + EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node))); + } + +private: + loco::Graph _graph; + std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor; +}; + +TEST_F(KernelBuilderTest, Add) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleAdd>(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Add>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, ArgMax) +{ + auto *input = createInputNode(); + auto *axis = createInputNode(); + + auto *op = createNode<luci::CircleArgMax>(); + op->input(input); + op->dimension(axis); + + op->output_type(loco::DataType::FLOAT32); + + auto kernel = buildKernel<kernels::ArgMax>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->axis(), axis); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().output_type, Eq(op->output_type())); +} + +TEST_F(KernelBuilderTest, AveragePool2D) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleAveragePool2D>(); + op->value(input); + + op->padding(luci::Padding::SAME); + op->filter()->h(11); + op->filter()->w(13); + op->stride()->h(17); + op->stride()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::AveragePool2D>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h())); + EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, BatchMatMul) +{ + auto *lhs = createInputNode(); + auto *rhs = createInputNode(); + + auto *op = createNode<luci::CircleBatchMatMul>(); + op->x(lhs); + op->y(rhs); + op->adj_x(false); + op->adj_y(false); + + auto kernel = buildKernel<kernels::BatchMatMul>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), lhs); + checkTensor(kernel->y(), rhs); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x())); + EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y())); +} + +TEST_F(KernelBuilderTest, Cast) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleCast>(); + op->x(input); + + auto kernel = buildKernel<kernels::Cast>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Concatenation) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleConcatenation>(2); + op->values(0, input1); + op->values(1, input2); + op->axis(11); + + auto kernel = buildKernel<kernels::Concatenation>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(0), input1); + checkTensor(kernel->input(1), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Conv2D) +{ + auto *input = createInputNode(); + auto *filter = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode<luci::CircleConv2D>(); + op->input(input); + op->filter(filter); + op->bias(bias); + + op->padding(luci::Padding::SAME); + op->stride()->h(11); + op->stride()->w(13); + op->dilation()->h(17); + op->dilation()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Conv2D>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->filter(), filter); + checkTensor(kernel->bias(), bias); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h())); + EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, DepthToSpace) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleDepthToSpace>(); + op->input(input); + + op->block_size(11); + + auto kernel = buildKernel<kernels::DepthToSpace>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().block_size, Eq(op->block_size())); +} + +TEST_F(KernelBuilderTest, DepthwiseConv2D) +{ + auto *input = createInputNode(); + auto *filter = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode<luci::CircleDepthwiseConv2D>(); + op->input(input); + op->filter(filter); + op->bias(bias); + + op->padding(luci::Padding::SAME); + op->depthMultiplier(11); + op->stride()->h(13); + op->stride()->w(17); + op->dilation()->h(19); + op->dilation()->w(23); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::DepthwiseConv2D>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->filter(), filter); + checkTensor(kernel->bias(), bias); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h())); + EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Div) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleDiv>(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Div>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Elu) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleElu>(); + op->features(input); + + auto kernel = buildKernel<kernels::Elu>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Exp) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleExp>(); + op->x(input); + + auto kernel = buildKernel<kernels::Exp>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Floor) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleFloor>(); + op->x(input); + + auto kernel = buildKernel<kernels::Floor>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, FloorDiv) +{ + auto *x = createInputNode(); + auto *y = createInputNode(); + + auto *op = createNode<luci::CircleFloorDiv>(); + op->x(x); + op->y(y); + + auto kernel = buildKernel<kernels::FloorDiv>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x); + checkTensor(kernel->y(), y); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Equal) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::Equal>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, FullyConnected) +{ + auto *input = createInputNode(); + auto *weights = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode<luci::CircleFullyConnected>(); + op->input(input); + op->weights(weights); + op->bias(bias); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::FullyConnected>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->weights(), weights); + checkTensor(kernel->bias(), bias); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Greater) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleGreater>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::Greater>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, GreaterEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleGreaterEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::GreaterEqual>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, InstanceNorm) +{ + auto *input = createInputNode(); + auto *gamma = createInputNode(); + auto *beta = createInputNode(); + + auto *op = createNode<luci::CircleInstanceNorm>(); + op->input(input); + op->gamma(gamma); + op->beta(beta); + + op->epsilon(1e-05); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::InstanceNorm>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->gamma(), gamma); + checkTensor(kernel->beta(), beta); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, L2Normalize) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleL2Normalize>(); + op->x(input); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::L2Normalize>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, L2Pool2D) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleL2Pool2D>(); + op->value(input); + + op->padding(luci::Padding::SAME); + op->filter()->h(11); + op->filter()->w(13); + op->stride()->h(17); + op->stride()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::L2Pool2D>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h())); + EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, LeakyRelu) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLeakyRelu>(); + op->features(input); + + op->alpha(11.0f); + + auto kernel = buildKernel<kernels::LeakyRelu>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().alpha, Eq(op->alpha())); +} + +TEST_F(KernelBuilderTest, Less) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleLess>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::Less>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LessEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleLessEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::LessEqual>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LocalResponseNormalization) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLocalResponseNormalization>(); + op->input(input); + + op->radius(11); + op->bias(13.0f); + op->alpha(15.0f); + op->beta(17.0f); + + auto kernel = buildKernel<kernels::LocalResponseNormalization>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().radius, Eq(op->radius())); + EXPECT_THAT(kernel->params().bias, Eq(op->bias())); + EXPECT_THAT(kernel->params().alpha, Eq(op->alpha())); + EXPECT_THAT(kernel->params().beta, Eq(op->beta())); +} + +TEST_F(KernelBuilderTest, LogicalAnd) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleLogicalAnd>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::LogicalAnd>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogicalNot) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLogicalNot>(); + op->x(input); + + auto kernel = buildKernel<kernels::LogicalNot>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogicalOr) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleLogicalOr>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::LogicalOr>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Logistic) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLogistic>(); + op->x(input); + + auto kernel = buildKernel<kernels::Logistic>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, LogSoftmax) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleLogSoftmax>(); + op->logits(input); + + auto kernel = buildKernel<kernels::LogSoftmax>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Maximum) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleMaximum>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::Maximum>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, MaxPool2D) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleMaxPool2D>(); + op->value(input); + + op->padding(luci::Padding::SAME); + op->filter()->h(11); + op->filter()->w(13); + op->stride()->h(17); + op->stride()->w(19); + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::MaxPool2D>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h())); + EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Mean) +{ + auto *input = createInputNode(); + auto *axes = createInputNode(); + + auto *op = createNode<luci::CircleMean>(); + op->input(input); + op->reduction_indices(axes); + + op->keep_dims(true); + + auto kernel = buildKernel<kernels::Mean>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->axes(), axes); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims())); +} + +TEST_F(KernelBuilderTest, Minimum) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleMinimum>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::Minimum>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Mul) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleMul>(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Mul>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Neg) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleNeg>(); + op->x(input); + + auto kernel = buildKernel<kernels::Neg>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, NotEqual) +{ + auto *x_input = createInputNode(); + auto *y_input = createInputNode(); + + auto *op = createNode<luci::CircleNotEqual>(); + op->x(x_input); + op->y(y_input); + + auto kernel = buildKernel<kernels::NotEqual>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->x(), x_input); + checkTensor(kernel->y(), y_input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, OneHot) +{ + auto *indices = createInputNode(); + auto *depth = createInputNode(); + auto *on_value = createInputNode(); + auto *off_value = createInputNode(); + auto axis = 1; + + auto *op = createNode<luci::CircleOneHot>(); + op->indices(indices); + op->depth(depth); + op->on_value(on_value); + op->off_value(off_value); + op->axis(axis); + + auto kernel = buildKernel<kernels::OneHot>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->indices(), indices); + checkTensor(kernel->depth(), depth); + checkTensor(kernel->on_value(), on_value); + checkTensor(kernel->off_value(), off_value); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); +} + +TEST_F(KernelBuilderTest, Pad) +{ + auto *input = createInputNode(); + auto *paddings = createInputNode(); + + auto *op = createNode<luci::CirclePad>(); + op->input(input); + op->paddings(paddings); + + auto kernel = buildKernel<kernels::Pad>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->paddings(), paddings); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, PadV2) +{ + auto *input = createInputNode(); + auto *paddings = createInputNode(); + auto *constant_values = createInputNode(); + + auto *op = createNode<luci::CirclePadV2>(); + op->input(input); + op->paddings(paddings); + op->constant_values(constant_values); + + auto kernel = buildKernel<kernels::PadV2>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->paddings(), paddings); + checkTensor(kernel->constant_values(), constant_values); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Pow) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CirclePow>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::Pow>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, PRelu) +{ + auto *input = createInputNode(); + auto *alpha = createInputNode(); + + auto *op = createNode<luci::CirclePRelu>(); + op->input(input); + op->alpha(alpha); + + auto kernel = buildKernel<kernels::PRelu>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->alpha(), alpha); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Relu) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleRelu>(); + op->features(input); + + auto kernel = buildKernel<kernels::Relu>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Relu6) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleRelu6>(); + op->features(input); + + auto kernel = buildKernel<kernels::Relu6>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Reshape) +{ + auto *input = createInputNode(); + auto *shape = createInputNode(); + + auto *op = createNode<luci::CircleReshape>(); + op->tensor(input); + op->shape(shape); + + auto kernel = buildKernel<kernels::Reshape>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->shape(), shape); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, ResizeBilinear) +{ + auto *input = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode<luci::CircleResizeBilinear>(); + op->input(input); + op->size(size); + op->align_corners(true); + op->half_pixel_centers(true); + + auto kernel = buildKernel<kernels::ResizeBilinear>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners())); + EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers())); +} + +TEST_F(KernelBuilderTest, ResizeNearestNeighbor) +{ + auto *input = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode<luci::CircleResizeNearestNeighbor>(); + op->input(input); + op->size(size); + op->align_corners(true); + + auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners())); + // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor + // after adding, need to be updated. +} + +TEST_F(KernelBuilderTest, ReverseV2) +{ + auto *input = createInputNode(); + auto *axes = createInputNode(); + + auto *op = createNode<luci::CircleReverseV2>(); + op->tensor(input); + op->axis(axes); + + auto kernel = buildKernel<kernels::ReverseV2>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->axes(), axes); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Rsqrt) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleRsqrt>(); + op->x(input); + + auto kernel = buildKernel<kernels::Rsqrt>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Slice) +{ + auto *input = createInputNode(); + auto *begin = createInputNode(); + auto *size = createInputNode(); + + auto *op = createNode<luci::CircleSlice>(); + op->input(input); + op->begin(begin); + op->size(size); + + auto kernel = buildKernel<kernels::Slice>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->begin(), begin); + checkTensor(kernel->size(), size); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Softmax) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleSoftmax>(); + op->logits(input); + + op->beta(11.0f); + + auto kernel = buildKernel<kernels::Softmax>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().beta, Eq(op->beta())); +} + +TEST_F(KernelBuilderTest, SpaceToDepth) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleSpaceToDepth>(); + op->input(input); + + op->block_size(11); + + auto kernel = buildKernel<kernels::SpaceToDepth>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().block_size, op->block_size()); +} + +TEST_F(KernelBuilderTest, Split) +{ + auto *axis = createInputNode(); + auto *input = createInputNode(); + auto *op = createNode<luci::CircleSplit>(); + auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0); + auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1); + + op->split_dim(axis); + op->input(input); + + op->num_split(2); + + auto kernel = buildKernel<kernels::Split>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->axis(), axis); + checkTensor(kernel->input(), input); + checkTensor(kernel->output(0), output1); + checkTensor(kernel->output(1), output2); +} + +TEST_F(KernelBuilderTest, SplitV) +{ + auto *input = createInputNode(); + auto *size_splits = createInputNode(); + auto *axis = createInputNode(); + auto *op = createNode<luci::CircleSplitV>(); + auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0); + auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1); + + op->input(input); + op->size_splits(size_splits); + op->split_dim(axis); + + op->num_split(2); + + auto kernel = buildKernel<kernels::SplitV>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size_splits(), size_splits); + checkTensor(kernel->axis(), axis); + checkTensor(kernel->output(0), output0); + checkTensor(kernel->output(1), output1); +} + +TEST_F(KernelBuilderTest, Sqrt) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleSqrt>(); + op->x(input); + + auto kernel = buildKernel<kernels::Sqrt>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, SquaredDifference) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleSquaredDifference>(); + op->x(input1); + op->y(input2); + + auto kernel = buildKernel<kernels::SquaredDifference>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Squeeze) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleSqueeze>(); + op->input(input); + + op->squeeze_dims({11, 13}); + + auto kernel = buildKernel<kernels::Squeeze>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims())); +} + +TEST_F(KernelBuilderTest, StridedSlice) +{ + auto *input = createInputNode(); + auto *begin = createInputNode(); + auto *end = createInputNode(); + auto *strides = createInputNode(); + + auto *op = createNode<luci::CircleStridedSlice>(); + op->input(input); + op->begin(begin); + op->end(end); + op->strides(strides); + + op->begin_mask(11); + op->ellipsis_mask(13); + op->end_mask(17); + op->new_axis_mask(19); + op->shrink_axis_mask(23); + + auto kernel = buildKernel<kernels::StridedSlice>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->begin(), begin); + checkTensor(kernel->end(), end); + checkTensor(kernel->strides(), strides); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask())); + EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask())); + EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask())); + EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask())); + EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask())); +} + +TEST_F(KernelBuilderTest, Sub) +{ + auto *input1 = createInputNode(); + auto *input2 = createInputNode(); + + auto *op = createNode<luci::CircleSub>(); + op->x(input1); + op->y(input2); + + op->fusedActivationFunction(luci::FusedActFunc::RELU); + + auto kernel = buildKernel<kernels::Sub>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input1(), input1); + checkTensor(kernel->input2(), input2); + checkTensor(kernel->output(), op); + EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction())); +} + +TEST_F(KernelBuilderTest, Tanh) +{ + auto *input = createInputNode(); + + auto *op = createNode<luci::CircleTanh>(); + op->x(input); + + auto kernel = buildKernel<kernels::Tanh>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, Transpose) +{ + auto *input = createInputNode(); + auto *perm = createInputNode(); + + auto *op = createNode<luci::CircleTranspose>(); + op->a(input); + op->perm(perm); + + auto kernel = buildKernel<kernels::Transpose>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->perm(), perm); + checkTensor(kernel->output(), op); +} + +TEST_F(KernelBuilderTest, TransposeConv) +{ + auto *output_shape = createInputNode(); + auto *filter = createInputNode(); + auto *input = createInputNode(); + auto *bias = createInputNode(); + + auto *op = createNode<luci::CircleTransposeConv>(); + op->inputSizes(output_shape); + op->filter(filter); + op->outBackprop(input); + op->bias(bias); + + op->padding(luci::Padding::SAME); + op->stride()->h(11); + op->stride()->w(13); + + auto kernel = buildKernel<kernels::TransposeConv>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->output_shape(), output_shape); + checkTensor(kernel->filter(), filter); + checkTensor(kernel->input(), input); + checkTensor(kernel->output(), op); + checkTensor(kernel->bias(), bias); + EXPECT_THAT(kernel->params().padding, Eq(op->padding())); + EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h())); + EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w())); +} + +TEST_F(KernelBuilderTest, Unpack) +{ + auto *input = createInputNode(); + auto *op = createNode<luci::CircleUnpack>(); + auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0); + auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1); + + op->value(input); + + op->num(2); + op->axis(11); + + auto kernel = buildKernel<kernels::Unpack>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->output(0), output1); + checkTensor(kernel->output(1), output2); + EXPECT_THAT(kernel->params().axis, Eq(op->axis())); +} + +TEST_F(KernelBuilderTest, NonExisting1_NEG) +{ + auto *op = createNode<luci::CircleConst>(); + ASSERT_ANY_THROW(buildKernel<Kernel>(op)); +} + +TEST_F(KernelBuilderTest, NonExisting2_NEG) +{ + auto *op = createNode<luci::CircleInput>(); + ASSERT_ANY_THROW(buildKernel<Kernel>(op)); +} + +TEST_F(KernelBuilderTest, NonExisting3_NEG) +{ + auto *op = createNode<luci::CircleOutput>(); + ASSERT_ANY_THROW(buildKernel<Kernel>(op)); +} + +} // namespace +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp new file mode 100644 index 000000000..23c96a6db --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loader/KernelBuilderHelper.h" + +#include <luci/IR/Nodes/CircleOutput.h> + +namespace luci_interpreter +{ + +const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const +{ + const Tensor *tensor = _node_to_tensor.at(node); + assert(tensor != nullptr); + return tensor; +} + +const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const +{ + if (dynamic_cast<const luci::CircleOutputExclude *>(node)) + { + return nullptr; + } + return getInputTensor(node); +} + +Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const +{ + Tensor *tensor = _node_to_tensor.at(node); + assert(tensor != nullptr); + return tensor; +} + +std::vector<Tensor *> +KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const +{ + std::vector<Tensor *> tensors; + tensors.reserve(nodes.size()); + for (const loco::Node *node : nodes) + tensors.push_back(getOutputTensor(node)); + return tensors; +} + +RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const +{ + RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); + assert(runtime_graph != nullptr); + return runtime_graph; +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h new file mode 100644 index 000000000..d6fb253b1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H +#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H + +#include "core/Kernel.h" +#include "core/RuntimeGraph.h" + +#include <loco/IR/Graph.h> +#include <loco/IR/Node.h> + +#include <vector> +#include <unordered_map> + +namespace luci_interpreter +{ + +class KernelBuilderHelper +{ +public: + KernelBuilderHelper( + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor) + { + } + +public: + const Tensor *getInputTensor(const loco::Node *node) const; + const Tensor *getOptionalInputTensor(const loco::Node *node) const; + + Tensor *getOutputTensor(const loco::Node *node) const; + std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const; + + RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const; + +public: + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const + { + return _graph_to_runtime_graph; + } + + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const + { + return _node_to_tensor; + } + +private: + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph; + const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; +}; + +template <typename CircleNodeOut> +std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node) +{ + std::vector<const CircleNodeOut *> output_nodes; + for (const loco::Node *loco_node : loco::succs(node)) + { + output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node)); + } + std::sort(output_nodes.begin(), output_nodes.end(), + [](const CircleNodeOut *node1, const CircleNodeOut *node2) { + return node1->index() < node2->index(); + }); + return {output_nodes.cbegin(), output_nodes.cend()}; +} + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp new file mode 100644 index 000000000..2f278b087 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModuleLoader.h" + +#include "GraphLoader.h" + +namespace luci_interpreter +{ + +ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, + RuntimeToIR &runtime_to_ir, + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager) + : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir), + _node_to_tensor(node_to_tensor), _memory_manager(memory_manager) +{ +} + +void ModuleLoader::load() +{ + // Runtime graphs have to be created in advance, because they will be needed during the loading + // process for control flow nodes. + for (size_t i = 0; i < _module->size(); ++i) + { + _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager)); + } + for (size_t i = 0; i < _module->size(); ++i) + { + const loco::Graph *graph = _module->graph(i); + RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); + GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph, + _node_to_tensor, _memory_manager); + loader.loadTensors(); + loader.initInputOutputTensors(); + loader.loadOperators(); + } +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h new file mode 100644 index 000000000..11326a2ee --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H +#define LUCI_INTERPRETER_LOADER_MODULELOADER_H + +#include "core/RuntimeModule.h" +#include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" + +#include <luci/IR/Module.h> + +#include <unordered_map> + +namespace luci_interpreter +{ + +class ModuleLoader +{ +public: + ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, + RuntimeToIR &runtime_to_ir, + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager); + + void load(); + +private: + IMemoryManager *_memory_manager; + const luci::Module *_module; + RuntimeModule *_runtime_module; + RuntimeToIR &_runtime_to_ir; + std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; + std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h new file mode 100644 index 000000000..9ea8b1fa2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H +#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H + +#include "luci_interpreter/core/Tensor.h" + +#include <luci/IR/CircleNode.h> + +#include <unordered_map> + +namespace luci_interpreter +{ + +// Maps runtime entities back to IR entities. It is used to implement observing functionality. +struct RuntimeToIR +{ + std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node; + std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp new file mode 100644 index 000000000..501e84752 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Add.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + AddParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Add>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp new file mode 100644 index 000000000..f3ca55744 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ArgMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node); + assert(node->arity() == 2); + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->dimension()); + Tensor *output = helper.getOutputTensor(node); + + ArgMaxParams params{}; + params.output_type = node->output_type(); + + return std::make_unique<kernels::ArgMax>(input, axis, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp new file mode 100644 index 000000000..a8135706f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/AveragePool2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp new file mode 100644 index 000000000..9da2f6d93 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchMatMul.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node); + assert(node->arity() == 2); + + const Tensor *lhs = helper.getInputTensor(node->x()); + const Tensor *rhs = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + auto lhs_scratchpad = + std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, ""); + lhs_scratchpad->set_observable(false); + lhs_scratchpad->set_data_buffer(nullptr); + auto rhs_scratchpad = + std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, ""); + rhs_scratchpad->set_observable(false); + rhs_scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current BatchMatMul temporary was found. + if (execution_plan.offsets().size() > 1) + { + assert(execution_plan.offsets().size() == 3); + + // If this is true, then we keep this offset in scratchpad. + lhs_scratchpad->set_offset(execution_plan.offsets().at(1)); + rhs_scratchpad->set_offset(execution_plan.offsets().at(2)); + } + } + Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad)); + Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad)); + + BatchMatMulParams params; + params.adj_x = node->adj_x(); + params.adj_y = node->adj_y(); + + return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp new file mode 100644 index 000000000..ac6ebb30f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchToSpaceND.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *crops = helper.getInputTensor(node->crops()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h new file mode 100644 index 000000000..eab284008 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H +#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H + +#include "loader/KernelBuilderHelper.h" + +#include "luci/IR/CircleNodes.h" + +namespace luci_interpreter +{ + +#define REGISTER_KERNEL(name) \ + std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \ + KernelBuilderHelper &helper); + +#include "KernelsToBuild.lst" + +#undef REGISTER_KERNEL + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp new file mode 100644 index 000000000..a16354c96 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Cast.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node); + + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Cast>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp new file mode 100644 index 000000000..ba2564ea2 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Concatenation.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node); + std::vector<const Tensor *> inputs(node->numValues()); + for (uint32_t i = 0; i < node->numValues(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + ConcatenationParams params{}; + params.axis = node->axis(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp new file mode 100644 index 000000000..218165e20 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Conv2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + Conv2DParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp new file mode 100644 index 000000000..174946367 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthToSpace.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + DepthToSpaceParams params{}; + params.block_size = node->block_size(); + + return std::make_unique<kernels::DepthToSpace>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp new file mode 100644 index 000000000..8af1e3b58 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthwiseConv2D.h" +#include <luci/Plan/CircleNodeExecutionPlan.h> + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + DepthwiseConv2DParams params{}; + params.padding = node->padding(); + params.depth_multiplier = node->depthMultiplier(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + // It is unknown what data will be stored in scratchpad tensor, + // using UINT8 as a most general option + auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, ""); + scratchpad->set_observable(false); + scratchpad->set_data_buffer(nullptr); + // If node has execution plan then read memory offsets for scratchpad temporary tensor + // from the beginning of shared memory buffer. + // Used in Static Memory Manager. + // TODO move tensors offset initialization to one place + if (luci::has_execution_plan(node)) + { + const auto execution_plan = luci::get_execution_plan(node); + // Check whether the offset for the current CircleConv2D temporary was found. + if (execution_plan.offsets().size() > 1) + // If this is true, then we keep this offset in scratchpad. + scratchpad->set_offset(execution_plan.offsets().at(1)); + } + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad)); + + return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp new file mode 100644 index 000000000..787322e9b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Dequantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Dequantize>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp new file mode 100644 index 000000000..0611dfdab --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Div.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node); + assert(node->arity() == 2); + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + DivParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Div>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp new file mode 100644 index 000000000..a79985e3b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Elu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Elu>(input, output); +} +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp new file mode 100644 index 000000000..59692883f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Equal.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) + +{ + const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Equal>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp new file mode 100644 index 000000000..30d11cb89 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Exp.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Exp>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp new file mode 100644 index 000000000..9840c34e5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ExpandDims.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::ExpandDims>(input, axis, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp new file mode 100644 index 000000000..3aefdf1c5 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Fill.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node); + assert(node->arity() == 2); + + const auto dims = helper.getInputTensor(node->dims()); + const auto value = helper.getInputTensor(node->value()); + auto output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Fill>(dims, value, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp new file mode 100644 index 000000000..e0a223116 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Floor.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Floor>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp new file mode 100644 index 000000000..a45d89e38 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FloorDiv.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::FloorDiv>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp new file mode 100644 index 000000000..b7b742b8a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FullyConnected.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *weights = helper.getInputTensor(node->weights()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + FullyConnectedParams params{}; + params.activation = node->fusedActivationFunction(); + params.keep_num_dims = node->keep_num_dims(); + + return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp new file mode 100644 index 000000000..2ee2906e0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Gather.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node); + assert(node->arity() == 2); + + const Tensor *params = helper.getInputTensor(node->params()); + const Tensor *indices = helper.getInputTensor(node->indices()); + Tensor *output = helper.getOutputTensor(node); + + GatherParams gparams{}; + gparams.axis = node->axis(); + // TODO support batch_dims + gparams.batch_dims = 0; + + return std::make_unique<kernels::Gather>(params, indices, output, gparams); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp new file mode 100644 index 000000000..80aa63cf0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Greater.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Greater>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp new file mode 100644 index 000000000..272f2843b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/GreaterEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::GreaterEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp new file mode 100644 index 000000000..3ac7d4941 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/If.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node); + assert(node->arity() == 1 + node->input_count()); + assert(output_nodes.size() == static_cast<size_t>(node->output_count())); + + const Tensor *cond = helper.getInputTensor(node->cond()); + std::vector<const Tensor *> inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph()); + RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph()); + + return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph, + else_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp new file mode 100644 index 000000000..06031e5bc --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/InstanceNorm.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *gamma = helper.getInputTensor(node->gamma()); + const Tensor *beta = helper.getInputTensor(node->beta()); + + Tensor *output = helper.getOutputTensor(node); + + InstanceNormParams params{}; + params.epsilon = node->epsilon(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp new file mode 100644 index 000000000..6e22e6d4e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Normalize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + L2NormParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::L2Normalize>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp new file mode 100644 index 000000000..95b55896f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Pool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::L2Pool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp new file mode 100644 index 000000000..bbf5067b1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LeakyRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + LeakyReluParams params{}; + params.alpha = node->alpha(); + + return std::make_unique<kernels::LeakyRelu>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp new file mode 100644 index 000000000..ae914ecc9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Less.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Less>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp new file mode 100644 index 000000000..f1b424b55 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LessEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LessEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp new file mode 100644 index 000000000..962ca2d7c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LocalResponseNormalization.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + LocalResponseNormalizationParams params{}; + params.radius = node->radius(); + params.bias = node->bias(); + params.alpha = node->alpha(); + params.beta = node->beta(); + + return std::make_unique<kernels::LocalResponseNormalization>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp new file mode 100644 index 000000000..432204115 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogSoftmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogSoftmax>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp new file mode 100644 index 000000000..bf3cb671a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalAnd.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalAnd>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp new file mode 100644 index 000000000..fefcd9a06 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalNot.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalNot>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp new file mode 100644 index 000000000..a416cb401 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalOr.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalOr>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp new file mode 100644 index 000000000..4a69deef1 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Logistic.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Logistic>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp new file mode 100644 index 000000000..f66a206ca --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MaxPool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::MaxPool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp new file mode 100644 index 000000000..d0bff776a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Maximum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Maximum>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp new file mode 100644 index 000000000..0dec63e79 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mean.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + auto temp_sum_unique = + std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, ""); + temp_sum_unique->set_observable(false); + temp_sum_unique->set_data_buffer(nullptr); + Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp new file mode 100644 index 000000000..1a49c1090 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Minimum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Minimum>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp new file mode 100644 index 000000000..b221b4574 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MirrorPad.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + MirrorPadParams params{}; + params.mode = node->mode(); + + return std::make_unique<kernels::MirrorPad>(input, paddings, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp new file mode 100644 index 000000000..f9984853a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mul.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + MulParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Mul>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp new file mode 100644 index 000000000..9a9ecf991 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Neg.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Neg>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp new file mode 100644 index 000000000..3916a5854 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/NotEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::NotEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp new file mode 100644 index 000000000..a40160945 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/OneHot.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node); + assert(node->arity() == 4); + + const Tensor *indices = helper.getInputTensor(node->indices()); + const Tensor *depth = helper.getInputTensor(node->depth()); + const Tensor *on_value = helper.getInputTensor(node->on_value()); + const Tensor *off_value = helper.getInputTensor(node->off_value()); + Tensor *output = helper.getOutputTensor(node); + + OneHotParams params{}; + params.axis = node->axis(); + + return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp new file mode 100644 index 000000000..f3d700c95 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *alpha = helper.getInputTensor(node->alpha()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::PRelu>(input, alpha, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp new file mode 100644 index 000000000..efc5850e0 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pack.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node); + assert(node->arity() == node->values_count()); + + std::vector<const Tensor *> inputs(node->values_count()); + for (uint32_t i = 0; i < node->values_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + PackParams params{}; + params.axis = node->axis(); + params.values_count = node->values_count(); + + return std::make_unique<kernels::Pack>(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp new file mode 100644 index 000000000..67ce997a7 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pad.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Pad>(input, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp new file mode 100644 index 000000000..e378a972a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PadV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + const Tensor *constant_values = helper.getInputTensor(node->constant_values()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp new file mode 100644 index 000000000..d32fc3dbb --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pow.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Pow>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp new file mode 100644 index 000000000..cb36fb6da --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Quantize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Quantize>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp new file mode 100644 index 000000000..1d64c1c4e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Relu>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp new file mode 100644 index 000000000..e50cd2545 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu6.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Relu6>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp new file mode 100644 index 000000000..76ddd88a3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Reshape.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *shape = helper.getInputTensor(node->shape()); + Tensor *output = helper.getOutputTensor(node); + + // NOTE 'newShape' attribute is ignored. + return std::make_unique<kernels::Reshape>(input, shape, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp new file mode 100644 index 000000000..dc2b88ad3 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeBilinear.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeBilinearParams params{}; + params.align_corners = node->align_corners(); + params.half_pixel_centers = node->half_pixel_centers(); + + return std::make_unique<kernels::ResizeBilinear>(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp new file mode 100644 index 000000000..c7058ae78 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeNearestNeighbor.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeNearestNeighborParams params{}; + params.align_corners = node->align_corners(); + // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated + // Current CircleResizeNearestNeighbor don't have half_pixel_centers. + // default value on current is false. + // it need to be updated when CircleResizeNearestNeighbor updated. + params.half_pixel_centers = false; + + return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp new file mode 100644 index 000000000..c1a7f5350 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReverseV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *axes = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::ReverseV2>(input, axes, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp new file mode 100644 index 000000000..0714a5dba --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Rsqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Rsqrt>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp new file mode 100644 index 000000000..d172ef438 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SVDF.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node); + assert(node->arity() == 5); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *feature = helper.getInputTensor(node->weight_feature()); + const Tensor *time = helper.getInputTensor(node->weight_time()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state()); + Tensor *output = helper.getOutputTensor(node); + + auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(), + Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32; + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + if (data_type == DataType::FLOAT32 && + (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8)) + { + data_type = feature->element_type(); + } + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + data_type = DataType::FLOAT32; + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, ""); + scratchpad_tensor->set_observable(false); + scratchpad_tensor->set_data_buffer(nullptr); + Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor)); + + SVDFParams params{}; + params.activation = node->fusedActivationFunction(); + params.svdf_rank = node->svdf_rank(); + params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs(); + + return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output, + tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp new file mode 100644 index 000000000..d1edbc794 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Shape.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node); + assert(node->arity() == 1); + + const auto input = helper.getInputTensor(node->input()); + auto output = helper.getOutputTensor(node); + + ShapeParams shape_params{}; + shape_params.out_type = node->out_type(); + + return std::make_unique<kernels::ShapeKernel>(input, output, shape_params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp new file mode 100644 index 000000000..60ac6417c --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Slice.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *size = helper.getInputTensor(node->size()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Slice>(input, begin, size, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp new file mode 100644 index 000000000..f41f63f6f --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Softmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + SoftmaxParams params{}; + params.beta = node->beta(); + + return std::make_unique<kernels::Softmax>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp new file mode 100644 index 000000000..b6e6cf516 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToBatchND.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp new file mode 100644 index 000000000..63fdb95ec --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToDepth.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + + Tensor *output = helper.getOutputTensor(node); + + SpaceToDepthParams params{}; + params.block_size = node->block_size(); + + return std::make_unique<kernels::SpaceToDepth>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp new file mode 100644 index 000000000..3f6d4a7df --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Split.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node); + assert(node->arity() == 2); + assert(output_nodes.size() == static_cast<size_t>(node->num_split())); + + const Tensor *axis = helper.getInputTensor(node->split_dim()); + const Tensor *input = helper.getInputTensor(node->input()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique<kernels::Split>(axis, input, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp new file mode 100644 index 000000000..0788822ca --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SplitV.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node); + assert(node->arity() == 3); + assert(output_nodes.size() == static_cast<size_t>(node->num_split())); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *sizes_data = helper.getInputTensor(node->size_splits()); + const Tensor *axis = helper.getInputTensor(node->split_dim()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp new file mode 100644 index 000000000..b9843fe0b --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Sqrt>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp new file mode 100644 index 000000000..0ad7c1772 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Square.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Square>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp new file mode 100644 index 000000000..e4c6fd851 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SquaredDifference.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::SquaredDifference>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp new file mode 100644 index 000000000..6885f8077 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Squeeze.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + SqueezeParams params{}; + params.squeeze_dims = node->squeeze_dims(); + + return std::make_unique<kernels::Squeeze>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp new file mode 100644 index 000000000..359b4e3e9 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/StridedSlice.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node); + assert(node->arity() == 4); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *end = helper.getInputTensor(node->end()); + const Tensor *strides = helper.getInputTensor(node->strides()); + + Tensor *output = helper.getOutputTensor(node); + + StridedSliceParams params{}; + params.begin_mask = node->begin_mask(); + params.ellipsis_mask = node->ellipsis_mask(); + params.end_mask = node->end_mask(); + params.new_axis_mask = node->new_axis_mask(); + params.shrink_axis_mask = node->shrink_axis_mask(); + + return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp new file mode 100644 index 000000000..a6252cb53 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sub.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + SubParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Sub>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp new file mode 100644 index 000000000..a58ef60a8 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Tanh.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Tanh>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp new file mode 100644 index 000000000..ea17d8311 --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Transpose.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->a()); + const Tensor *perm = helper.getInputTensor(node->perm()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Transpose>(input, perm, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp new file mode 100644 index 000000000..d773e301e --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/TransposeConv.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node); + assert(node->arity() == 4); + + const Tensor *input_sizes = helper.getInputTensor(node->inputSizes()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *out_backprop = helper.getInputTensor(node->outBackprop()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + + Tensor *output = helper.getOutputTensor(node); + + DataType scratch_data_type = + helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + + auto scratch_tensor = + std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, ""); + scratch_tensor->set_observable(false); + scratch_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor)); + + TransposeConvParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + + return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output, + tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp new file mode 100644 index 000000000..a1c0d323a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Unpack.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node); + auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node); + assert(node->arity() == 1); + assert(output_nodes.size() == static_cast<size_t>(node->num())); + + const Tensor *input = helper.getInputTensor(node->value()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + UnpackParams params{}; + params.axis = node->axis(); + + // NOTE 'num' attribute is ignored. + return std::make_unique<kernels::Unpack>(input, std::move(outputs), params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp new file mode 100644 index 000000000..8fde6ec8a --- /dev/null +++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/While.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node); + + auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node); + assert(node->arity() == node->input_count()); + assert(output_nodes.size() == static_cast<size_t>(node->output_count())); + + std::vector<const Tensor *> inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph()); + RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph()); + + return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph, + body_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt index 7953359ad..d3048264d 100644 --- a/compiler/luci-micro/standalone/CMakeLists.txt +++ b/compiler/luci-micro/standalone/CMakeLists.txt @@ -7,6 +7,9 @@ set(BUILD_WHITELIST "dummy") add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc) set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler") +nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET) + +include_directories(${FlatBuffersSource_DIR}/include) add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco) add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor) @@ -14,7 +17,21 @@ add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops) add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str) add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo) add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes-std ${CMAKE_CURRENT_BINARY_DIR}/hermes-std) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes ${CMAKE_CURRENT_BINARY_DIR}/hermes) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-strcast ${CMAKE_CURRENT_BINARY_DIR}/pepper-strcast) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/foder ${CMAKE_CURRENT_BINARY_DIR}/foder) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/mio-circle04 ${CMAKE_CURRENT_BINARY_DIR}/mio-circle04) + add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv) add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/import ${CMAKE_CURRENT_BINARY_DIR}/luci/import) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/profile ${CMAKE_CURRENT_BINARY_DIR}/luci/profile) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/env ${CMAKE_CURRENT_BINARY_DIR}/luci/env) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/plan ${CMAKE_CURRENT_BINARY_DIR}/luci/plan) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/log ${CMAKE_CURRENT_BINARY_DIR}/luci/log) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/logex ${CMAKE_CURRENT_BINARY_DIR}/luci/logex) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/locop ${CMAKE_CURRENT_BINARY_DIR}/locop) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/pp ${CMAKE_CURRENT_BINARY_DIR}/pp) -add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter) diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt index 034fe5269..3489f1eac 100644 --- a/compiler/luci-pass-value-test/CMakeLists.txt +++ b/compiler/luci-pass-value-test/CMakeLists.txt @@ -17,6 +17,13 @@ macro(addeval RECIPE PASS_OPTION) set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}") set(DASH_PASS_OPTION "--${PASS_OPTION}") + foreach(MORE_OPTIONS ${ARGN}) + list(APPEND DASH_PASS_OPTION "--${MORE_OPTIONS}") + endforeach() + # NOTE if there are two options, 'DASH_PASS_OPTION' will be like '--option_a;--option_b' + # add_custom_command() will translate ';' to two arguments as '--optiona_a --optionb' + # do not use set(DASH_PASS_OPTION "${DASH_PASS_OPTION} --${ARG}")) + # as this will become like '"--optiona_a --optionb"' which is one string argument # Generate optimized .circle add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH} diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst index 67476c644..cdff159e0 100644 --- a/compiler/luci-pass-value-test/test.lst +++ b/compiler/luci-pass-value-test/test.lst @@ -14,6 +14,8 @@ addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv) addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6) addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6) addeval(Net_Conv_Relu6_000 fuse_activation_function) +addeval(Net_Densify_Add_000 fold_densify) +addeval(Net_Dequantize_Add_000 fold_dequantize) addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv) addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv) addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop) @@ -25,10 +27,17 @@ addeval(Net_TConv_Add_002 fuse_add_with_tconv) addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv) addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv) addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv) +addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv) +addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv) addeval(Net_InstanceNorm_001 fuse_instnorm) addeval(Net_InstanceNorm_002 fuse_instnorm) addeval(Net_InstanceNorm_003 fuse_instnorm) addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice) +addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul) + +# test for limited support for FLOAT16 +addeval(Net_Dequantize_Add_000 fold_dequantize) +addeval(Net_Densify_Dequantize_Add_000 fold_dequantize fold_densify) # test SignatureDef, with any optimization #addeval(SignatureDef_MultiOut_000 fuse_instnorm) diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst index f62b72919..932da95c5 100644 --- a/compiler/luci-value-test/test.lst +++ b/compiler/luci-value-test/test.lst @@ -161,6 +161,8 @@ addeval(Squeeze_001) addeval(StridedSlice_000) addeval(StridedSlice_001) addeval(StridedSlice_002) +addeval(StridedSlice_003) +addeval(StridedSlice_004) addeval(Sub_000) addeval(Sub_U8_000) #addeval(Sum_000) diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h index 0ff21a34b..7516197c0 100644 --- a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h +++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h @@ -118,6 +118,10 @@ public: return circle::CreateCosOptions(_builder).Union(); } flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; } + flatbuffers::Offset<void> visit(luci::CircleDensify *) + { + return circle::CreateDensifyOptions(_builder).Union(); + } flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node) { return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union(); diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst index 1b6909303..8a75ef706 100644 --- a/compiler/luci/export/src/CircleOps.lst +++ b/compiler/luci/export/src/CircleOps.lst @@ -32,6 +32,7 @@ CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_C CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions) CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions) CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE) +CIRCLE_NODE(CircleDensify, BuiltinOperator_DENSIFY, BuiltinOptions_DensifyOptions) CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions) CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions) CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions) diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index b3bb850cc..97e81076b 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -434,6 +434,12 @@ flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, l break; } + // NOTE loco::DataType::FLOAT16 is added but we do not export this type + // as backends currently don't support this type. + // currently this is supported only for "Tensor(Float16) - Dequantize" + // sequence so that after 'fold_dequantize' option this Tensor is + // converted to FLOAT32. + INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype())); } diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt index 1b2db23ae..bc0a00b34 100644 --- a/compiler/luci/import/CMakeLists.txt +++ b/compiler/luci/import/CMakeLists.txt @@ -18,6 +18,7 @@ target_link_libraries(luci_import PRIVATE luci_log) target_link_libraries(luci_import PRIVATE luci_logex) target_link_libraries(luci_import PRIVATE nncc_common) target_link_libraries(luci_import PRIVATE locop) +target_link_libraries(luci_import PRIVATE foder) target_link_libraries(luci_import PRIVATE oops) target_link_libraries(luci_import PRIVATE mio_circle04_helper) install(TARGETS luci_import DESTINATION lib) diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h index 7a5045ede..a4a6d7ce8 100644 --- a/compiler/luci/import/include/luci/Import/Nodes.h +++ b/compiler/luci/import/include/luci/Import/Nodes.h @@ -35,6 +35,7 @@ #include "Nodes/CircleConv2D.h" #include "Nodes/CircleCos.h" #include "Nodes/CircleCustom.h" +#include "Nodes/CircleDensify.h" #include "Nodes/CircleDepthToSpace.h" #include "Nodes/CircleDepthwiseConv2D.h" #include "Nodes/CircleDequantize.h" diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h new file mode 100644 index 000000000..42bdac1a4 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__ +#define __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__ + +#include "luci/Import/GraphBuilder.h" + +namespace luci +{ + +class CircleDensifyGraphBuilder : public GraphBuilder +{ +public: + bool validate(const ValidateArgs &args) const final; + +private: + CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, + loco::Graph *graph) const final; +}; + +} // namespace luci + +#endif // __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__ diff --git a/compiler/luci/import/include/luci/ImporterEx.h b/compiler/luci/import/include/luci/ImporterEx.h new file mode 100644 index 000000000..852d4c848 --- /dev/null +++ b/compiler/luci/import/include/luci/ImporterEx.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IMPORTER_EX_H__ +#define __LUCI_IMPORTER_EX_H__ + +#include "luci/IR/Module.h" + +#include <memory> +#include <string> + +namespace luci +{ + +class ImporterEx final +{ +public: + ImporterEx() = default; + +public: + std::unique_ptr<Module> importVerifyModule(const std::string &input_path) const; +}; + +} // namespace luci + +#endif // __LUCI_IMPORTER_EX_H__ diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp index fe2d830e9..d3b52aadb 100644 --- a/compiler/luci/import/src/GraphBuilderRegistry.cpp +++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp @@ -44,6 +44,7 @@ GraphBuilderRegistry::GraphBuilderRegistry() CIRCLE_NODE(CONCATENATION, CircleConcatenationGraphBuilder); // 2 CIRCLE_NODE(CONV_2D, CircleConv2DGraphBuilder); // 3 CIRCLE_NODE(COS, CircleCosGraphBuilder); // 108 + CIRCLE_NODE(DENSIFY, CircleDensifyGraphBuilder); // 124 CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceGraphBuilder); // 5 CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DGraphBuilder); // 4 CIRCLE_NODE(DEQUANTIZE, CircleDequantizeGraphBuilder); // 6 @@ -160,7 +161,6 @@ GraphBuilderRegistry::GraphBuilderRegistry() // BuiltinOperator_DELEGATE = 51, // BuiltinOperator_ARG_MAX = 56, // BuiltinOperator_HARD_SWISH = 117, - // BuiltinOperator_DENSIFY = 124, // Register builders for nodes which not handles in builders registered above. #define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>()) diff --git a/compiler/luci/import/src/ImporterEx.cpp b/compiler/luci/import/src/ImporterEx.cpp new file mode 100644 index 000000000..db585fd4d --- /dev/null +++ b/compiler/luci/import/src/ImporterEx.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Importer.h" +#include "luci/ImporterEx.h" + +#include <foder/FileLoader.h> + +#include <memory> +#include <iostream> + +namespace luci +{ + +std::unique_ptr<Module> ImporterEx::importVerifyModule(const std::string &input_path) const +{ + foder::FileLoader file_loader{input_path}; + std::vector<char> model_data; + + try + { + model_data = file_loader.load(); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + return nullptr; + } + + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; + return nullptr; + } + + const circle::Model *circle_model = circle::GetModel(model_data.data()); + if (circle_model == nullptr) + { + std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; + return nullptr; + } + + Importer importer; + return importer.importModule(circle_model); +} + +} // namespace luci diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp index a4f190dd9..88f2ae3d0 100644 --- a/compiler/luci/import/src/Nodes/CircleConst.cpp +++ b/compiler/luci/import/src/Nodes/CircleConst.cpp @@ -166,6 +166,10 @@ CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index, copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node); break; + case loco::DataType::FLOAT16: + copy_data<loco::DataType::FLOAT16>(buffer, num_elements, const_node); + break; + case loco::DataType::U8: copy_data<loco::DataType::U8>(buffer, num_elements, const_node); break; diff --git a/compiler/luci/import/src/Nodes/CircleDensify.cpp b/compiler/luci/import/src/Nodes/CircleDensify.cpp new file mode 100644 index 000000000..0a4b2186f --- /dev/null +++ b/compiler/luci/import/src/Nodes/CircleDensify.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/Nodes/CircleDensify.h" + +#include <luci/IR/Nodes/CircleDensify.h> + +#include <loco.h> + +namespace luci +{ + +bool CircleDensifyGraphBuilder::validate(const ValidateArgs &args) const +{ + return GraphBuilder::validate(args, 1); +} + +CircleNode *CircleDensifyGraphBuilder::build_node(const circle::OperatorT &, + const std::vector<CircleNode *> &inputs, + loco::Graph *graph) const +{ + auto *node = graph->nodes()->create<CircleDensify>(); + node->input(inputs.at(0)); + + // No options for Densify + + return node; +} + +} // namespace luci diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h index d89ea03cc..901f1cbca 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.h +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h @@ -32,6 +32,7 @@ #include "Nodes/CircleConv2D.h" #include "Nodes/CircleCos.h" #include "Nodes/CircleCustom.h" +#include "Nodes/CircleDensify.h" #include "Nodes/CircleDepthToSpace.h" #include "Nodes/CircleDepthwiseConv2D.h" #include "Nodes/CircleDequantize.h" diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst index 1472008df..f227a03f5 100644 --- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst +++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst @@ -30,6 +30,7 @@ CIRCLE_NODE(CONCATENATION, CircleConcatenation) CIRCLE_NODE(CONV_2D, CircleConv2D) CIRCLE_NODE(COS, CircleCos) CIRCLE_NODE(CUSTOM, CircleCustom) +CIRCLE_NODE(DENSIFY, CircleDensify) CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpace) CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2D) CIRCLE_NODE(DEQUANTIZE, CircleDequantize) diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h new file mode 100644 index 000000000..7acad0341 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_CIRCLE_DENSIFY_H__ +#define __LUCI_IR_CIRCLE_DENSIFY_H__ + +#include "luci/IR/CircleNodeDecl.h" +#include "luci/IR/CircleOpcode.h" + +#include "luci/IR/CircleNodeMixins.h" + +namespace luci +{ + +/** + * @brief DENSIFY in Circle + */ +class CircleDensify final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DENSIFY>> +{ +public: + loco::Node *input(void) const { return at(0)->node(); } + void input(loco::Node *node) { at(0)->node(node); } +}; + +} // namespace luci + +#endif // __LUCI_IR_CIRCLE_DENSIFY_H__ diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp index c2d82c8a2..a4854ec59 100644 --- a/compiler/luci/lang/src/Nodes/CircleConst.cpp +++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp @@ -77,6 +77,7 @@ INSTANTIATE(loco::DataType::S8); INSTANTIATE(loco::DataType::FLOAT32); INSTANTIATE(loco::DataType::U8); INSTANTIATE(loco::DataType::BOOL); +INSTANTIATE(loco::DataType::FLOAT16); #undef INSTANTIATE diff --git a/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp new file mode 100644 index 000000000..ae83784a5 --- /dev/null +++ b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/IR/Nodes/CircleDensify.h" + +#include "luci/IR/CircleDialect.h" +#include "luci/IR/CircleNodeVisitor.h" + +#include <gtest/gtest.h> + +TEST(CircleDensifyTest, constructor) +{ + luci::CircleDensify densify_node; + + ASSERT_EQ(luci::CircleDialect::get(), densify_node.dialect()); + ASSERT_EQ(luci::CircleOpcode::DENSIFY, densify_node.opcode()); + + ASSERT_EQ(nullptr, densify_node.input()); +} + +TEST(CircleDensifyTest, input_NEG) +{ + luci::CircleDensify densify_node; + luci::CircleDensify node; + + densify_node.input(&node); + ASSERT_NE(nullptr, densify_node.input()); + + densify_node.input(nullptr); + ASSERT_EQ(nullptr, densify_node.input()); +} + +TEST(CircleDensifyTest, arity_NEG) +{ + luci::CircleDensify densify_node; + + ASSERT_NO_THROW(densify_node.arg(0)); + ASSERT_THROW(densify_node.arg(1), std::out_of_range); +} + +TEST(CircleDensifyTest, visit_mutable_NEG) +{ + struct TestVisitor final : public luci::CircleNodeMutableVisitor<void> + { + }; + + luci::CircleDensify densify_node; + + TestVisitor tv; + ASSERT_THROW(densify_node.accept(&tv), std::exception); +} + +TEST(CircleDensifyTest, visit_NEG) +{ + struct TestVisitor final : public luci::CircleNodeVisitor<void> + { + }; + + luci::CircleDensify densify_node; + + TestVisitor tv; + ASSERT_THROW(densify_node.accept(&tv), std::exception); +} diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp index eff0830b4..8409f250e 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp @@ -137,6 +137,7 @@ CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node) CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder) CIRCLE_NODE(COS, CircleCosSummaryBuilder) CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder) + CIRCLE_NODE(DENSIFY, CircleDensifySummaryBuilder) CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder) CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder) CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder) diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp index 6df9270e3..48e4579ea 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp @@ -374,6 +374,22 @@ void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction())); } +void CircleConstSummaryBuilder::build_attributes(const luci::CircleNode *node, + locop::NodeSummary &s) +{ + auto circonst = loco::must_cast<const luci::CircleConst *>(node); + s.args().append("dtype", to_str(circonst->dtype())); + s.args().append("rank", std::to_string(circonst->rank())); + std::string shape; + for (uint32_t r = 0; r < circonst->rank(); ++r) + { + if (!shape.empty()) + shape += " "; + shape += std::to_string(circonst->dim(r).value()); + } + s.args().append("shape", "[" + shape + "]"); +} + void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s) { s.state(locop::NodeDesc::State::PartiallyKnown); diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h index 6cd24b7f1..f0cac4e5e 100644 --- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h +++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h @@ -167,6 +167,7 @@ private: class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder { private: + void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); void update_status(locop::NodeSummary &s); }; @@ -189,6 +190,10 @@ private: void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s); }; +class CircleDensifySummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder +{ +}; + class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder { private: diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/include/luci/ConnectNode.h index e60567c69..2d9d41d77 100644 --- a/compiler/luci/partition/src/ConnectNode.h +++ b/compiler/luci/partition/include/luci/ConnectNode.h @@ -77,6 +77,7 @@ public: void visit(const luci::CircleConv2D *) final; void visit(const luci::CircleCos *) final; void visit(const luci::CircleCustom *) final; + void visit(const luci::CircleDensify *) final; void visit(const luci::CircleDepthToSpace *) final; void visit(const luci::CircleDepthwiseConv2D *) final; void visit(const luci::CircleDequantize *) final; diff --git a/compiler/luci/partition/src/ConnectNode.cpp b/compiler/luci/partition/src/ConnectNode.cpp index 336be7c57..3d8c211c0 100644 --- a/compiler/luci/partition/src/ConnectNode.cpp +++ b/compiler/luci/partition/src/ConnectNode.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include <oops/UserExn.h> diff --git a/compiler/luci/partition/src/ConnectNode.test.h b/compiler/luci/partition/src/ConnectNode.test.h index ac4878a15..18bb52a20 100644 --- a/compiler/luci/partition/src/ConnectNode.test.h +++ b/compiler/luci/partition/src/ConnectNode.test.h @@ -17,7 +17,7 @@ #ifndef __CONNECT_NODE_TEST_H__ #define __CONNECT_NODE_TEST_H__ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include <luci/Service/CircleNodeClone.h> #include <luci/test/TestIOGraph.h> diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.cpp index a3fde4c45..a7fbc37d1 100644 --- a/compiler/luci/partition/src/Nodes/CircleAbs.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAbs.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp index f3e721525..ac805c1af 100644 --- a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.cpp index d393997e9..0754be626 100644 --- a/compiler/luci/partition/src/Nodes/CircleAdd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAdd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp index e457b83d2..99ae52c54 100644 --- a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.cpp index 81e5e0949..90aaeee3a 100644 --- a/compiler/luci/partition/src/Nodes/CircleAddN.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAddN.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp index 5d0a7489f..37743d3a3 100644 --- a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp index 1409586d7..99b30d38f 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp index c816fbeb8..77248e07e 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp index 6151aa98a..1bb3d84e7 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp index d150be4d6..ed0cf030c 100644 --- a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp index 547665771..1df86c7be 100644 --- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp index fba2be835..266120b92 100644 --- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp index 5b1dd8543..6d50f0e31 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp index 3d64f4b29..2191f5b0a 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp index 90c4d9ef3..a9e810a27 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp index bbbd3f157..0324d85e0 100644 --- a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp index c3992a64e..5a459e78c 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp index 94336d36a..e6d26a6a1 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp index 2a463afb1..40b8f7052 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp index 544f5e127..e9cb350b8 100644 --- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCast.cpp b/compiler/luci/partition/src/Nodes/CircleCast.cpp index f7630cd85..e1301aa06 100644 --- a/compiler/luci/partition/src/Nodes/CircleCast.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCast.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp index 005119060..d7b679aa2 100644 --- a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.cpp index a0c94033e..e7b5f5a3f 100644 --- a/compiler/luci/partition/src/Nodes/CircleCeil.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCeil.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp index dbd7e5390..cb0364844 100644 --- a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp index fb24d21ca..d895685f0 100644 --- a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp index 4d64b85a2..b5c05e25d 100644 --- a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleConst.cpp b/compiler/luci/partition/src/Nodes/CircleConst.cpp index 118cd8de2..b88f5ef4e 100644 --- a/compiler/luci/partition/src/Nodes/CircleConst.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConst.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp index 46716f0ec..ca9cce18f 100644 --- a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp index 829adec9b..4596d9618 100644 --- a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCos.cpp b/compiler/luci/partition/src/Nodes/CircleCos.cpp index 9dcf81e83..76b1baac3 100644 --- a/compiler/luci/partition/src/Nodes/CircleCos.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCos.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp index 6c92b93fb..ba806a3f9 100644 --- a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.cpp index ac16ebe40..cc1604876 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustom.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustom.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp index 9f40b5220..f7fe86674 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp index fee1a1a8c..0d83cffaa 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp index 0a293970e..ddd4e93f2 100644 --- a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.cpp new file mode 100644 index 000000000..cfb236a5d --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleDensify.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/ConnectNode.h" + +namespace +{ + +void connect(luci::ConnectNode *cn, const luci::CircleDensify *node) +{ + auto *cloned = loco::must_cast<luci::CircleDensify *>(cn->find_clone(node)); + + luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input()); + + cloned->input(cn->find_clone(input)); +} + +} // namespace + +namespace luci +{ + +void ConnectNode::visit(const luci::CircleDensify *node) { connect(this, node); } + +} // namespace luci diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp new file mode 100644 index 000000000..94076a8db --- /dev/null +++ b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/ConnectNode.h" + +#include "ConnectNode.test.h" + +#include <luci/Service/CircleNodeClone.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +class NodeGraphlet : public NodeGraphletT<luci::CircleDensify> +{ +public: + NodeGraphlet() = default; +}; + +class TestNodeGraph : public TestIOGraph, public NodeGraphlet +{ +public: + TestNodeGraph() = default; + +public: + void init(const ShapeU32 shape) + { + TestIOGraph::init(shape, shape); + NodeGraphlet::init(g()); + + node()->input(input()); + + output()->from(node()); + } +}; + +} // namespace + +TEST(ConnectNodeTest, connect_Densify) +{ + TestNodeGraph tng; + tng.init({2, 3}); + + ConnectionTestHelper cth; + cth.prepare_inputs(&tng); + + auto *node = tng.node(); + ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node)); + + auto *clone = luci::clone_node(node, cth.graph_clone()); + ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone)); + + cth.clone_connect(node, clone); + + ASSERT_EQ(1, clone->arity()); + ASSERT_EQ(cth.inputs(0), clone->arg(0)); +} + +TEST(ConnectNodeTest, connect_Densify_NEG) +{ + TestNodeGraph tng; + tng.init({2, 3}); + + ConnectionTestHelper cth; + cth.prepare_inputs_miss(&tng); + + auto *node = tng.node(); + ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node)); + + auto *clone = luci::clone_node(node, cth.graph_clone()); + ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone)); + + EXPECT_ANY_THROW(cth.clone_connect(node, clone)); +} diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp index ade266e41..c044b4c42 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp index 997360a9b..1b61a3517 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp index 19d1d5f42..2bd9ab5ca 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp index 681f98bdb..02976a488 100644 --- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp index 3a520d4e9..ac2642bc1 100644 --- a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp index 7f6006c1d..d3a43d374 100644 --- a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.cpp index 480338542..8941a4196 100644 --- a/compiler/luci/partition/src/Nodes/CircleDiv.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDiv.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp index 226932337..7900beafc 100644 --- a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleElu.cpp b/compiler/luci/partition/src/Nodes/CircleElu.cpp index d21cd4c01..b77226574 100644 --- a/compiler/luci/partition/src/Nodes/CircleElu.cpp +++ b/compiler/luci/partition/src/Nodes/CircleElu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp index 94774cca8..20b205048 100644 --- a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.cpp index 6a126c0e2..2dc0e759b 100644 --- a/compiler/luci/partition/src/Nodes/CircleEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp index 20b539199..c0d3bd915 100644 --- a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleExp.cpp b/compiler/luci/partition/src/Nodes/CircleExp.cpp index 95fb1cd67..c1da7908a 100644 --- a/compiler/luci/partition/src/Nodes/CircleExp.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExp.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp index 16d7244ab..286f205bf 100644 --- a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp index 6fccd6310..a6ce6495c 100644 --- a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp index 8a5156509..37af10f52 100644 --- a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp index 4855d80ae..5dfaee1b5 100644 --- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp index 3821d755a..2a2ec0cff 100644 --- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFill.cpp b/compiler/luci/partition/src/Nodes/CircleFill.cpp index 06fca7b41..32688cd9b 100644 --- a/compiler/luci/partition/src/Nodes/CircleFill.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFill.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp index 97a5a348d..4b3872a80 100644 --- a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.cpp index 7ad392461..f7409a221 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloor.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloor.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp index 1a964ea21..883d36256 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp index 3b92b00c6..57e435c23 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp index 3d2801566..1eb603c5d 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp index 9f868d0e5..1b942d200 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp index 89a09411b..680bf1680 100644 --- a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp index da273037a..206b47aec 100644 --- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp index fc88204bd..39eea5571 100644 --- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGather.cpp b/compiler/luci/partition/src/Nodes/CircleGather.cpp index 0ee458394..4f059cbe4 100644 --- a/compiler/luci/partition/src/Nodes/CircleGather.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGather.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp index 7f4e08435..f427e0456 100644 --- a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp index 4be05ca94..6a9c3b47f 100644 --- a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp index d673698e1..0207e917d 100644 --- a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.cpp index 7bc2a14c9..9f4b18fde 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreater.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreater.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp index 842370d42..61d1f5957 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp index 536a0aed6..76130a843 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp index 76dc770f8..7e4e1ef74 100644 --- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleIf.cpp b/compiler/luci/partition/src/Nodes/CircleIf.cpp index 1672a136d..45e4ec48b 100644 --- a/compiler/luci/partition/src/Nodes/CircleIf.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIf.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp index dbd25c822..cbb766221 100644 --- a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp index 969bdd93c..2eb5dda1f 100644 --- a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp index 9207654bc..ec2dde3b2 100644 --- a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp index 386652fb1..f64ffd8b4 100644 --- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp +++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp index b932223d0..4363c6c18 100644 --- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp index 61ddba264..df26930ec 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp index 4fc23727a..b114a15f0 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp index 24333d507..1eacddb62 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp index 40328488c..22f99d5ef 100644 --- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp index 3da1ba287..1702ddeb1 100644 --- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp index 5a0d1dd87..71dc55ea0 100644 --- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLess.cpp b/compiler/luci/partition/src/Nodes/CircleLess.cpp index aab495fcc..52726f9be 100644 --- a/compiler/luci/partition/src/Nodes/CircleLess.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLess.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp index ab65e5d18..c5d194efe 100644 --- a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp index ec129dbe8..e9a3c412b 100644 --- a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp index 0dd8986b6..29f4ababa 100644 --- a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp index 6b0d1cd12..7a00bf94f 100644 --- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp index e1973387d..5e5723817 100644 --- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLog.cpp b/compiler/luci/partition/src/Nodes/CircleLog.cpp index c43570fa2..676d22fc0 100644 --- a/compiler/luci/partition/src/Nodes/CircleLog.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLog.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp index 8a43f6f01..0a2b97538 100644 --- a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp index de582c80d..c67b08f0f 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp index 1e60bf54c..b6daeb781 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp index 28e8f42e5..1498d85ec 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp index a1189f06f..0b9513626 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp index e2657824c..f9c077e4e 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp index f6b34596e..88dff3651 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp index 418dc023b..59592e41d 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp index fee3f4779..35f8029c0 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp index 7d788512d..804597bed 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp index c4b3f7fe3..241d84040 100644 --- a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp index e92806aff..297e9f2cc 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp index 03e3c3c3e..472cab8c8 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp index 29bb7fe5f..b327aacad 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp index 5503ea18f..4ff797c43 100644 --- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp index 75a665aee..dee90e5c0 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp index 16996497a..949e0d724 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp index 2ba6055b4..459917e3e 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp index 370174c37..e6a6d5741 100644 --- a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMean.cpp b/compiler/luci/partition/src/Nodes/CircleMean.cpp index b634e5838..c704d0054 100644 --- a/compiler/luci/partition/src/Nodes/CircleMean.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMean.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp index 53435d9dc..838d7aea2 100644 --- a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp index cdf757583..8958bf64a 100644 --- a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp index 2fe6b0da6..a6c86a27a 100644 --- a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp index 16a24abf7..91c3cb97a 100644 --- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp index 605a126c9..b837e1012 100644 --- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleMul.cpp b/compiler/luci/partition/src/Nodes/CircleMul.cpp index 2cd2b4038..12e14728c 100644 --- a/compiler/luci/partition/src/Nodes/CircleMul.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMul.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp index 99cf0824d..b316679f8 100644 --- a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.cpp index 413ad4930..e9dcc45cd 100644 --- a/compiler/luci/partition/src/Nodes/CircleNeg.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNeg.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp index bd74a3665..ab13c9416 100644 --- a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp index 63ff3f021..88d72e12f 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp index 2771aef49..e796a14c3 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp index 80e4704b9..61caa3a4c 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp index 5a0a8da8c..eb04f2688 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp index c1f117724..3b0b755a4 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp index 1f20fbb0f..c9c31b315 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp index 69e3cc8e8..3eed260c2 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp index e001b0b0b..2c5822fe3 100644 --- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp index c40c2a21a..29a6a43bb 100644 --- a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp index 360940ca7..2983e1b27 100644 --- a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp index d76f49255..d172fb834 100644 --- a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp index 3c555c290..59780e424 100644 --- a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp index a033e80a8..61d7620aa 100644 --- a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp index 106eb405d..36ce35077 100644 --- a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp +++ b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp index b8a2341c8..6a2325715 100644 --- a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp index e5bcedcf6..f2a2e2c7d 100644 --- a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePack.cpp b/compiler/luci/partition/src/Nodes/CirclePack.cpp index 326881067..d4b49bfa9 100644 --- a/compiler/luci/partition/src/Nodes/CirclePack.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePack.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp index 68c513848..665b137e8 100644 --- a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePad.cpp b/compiler/luci/partition/src/Nodes/CirclePad.cpp index eb2a89c85..0a1d6f7f9 100644 --- a/compiler/luci/partition/src/Nodes/CirclePad.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePad.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp index 24ea83fa3..72f97d6a4 100644 --- a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp index 001fecbcb..969cc271d 100644 --- a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp index aea8e0cce..9829f6269 100644 --- a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CirclePow.cpp b/compiler/luci/partition/src/Nodes/CirclePow.cpp index fb180ee69..ce69e7402 100644 --- a/compiler/luci/partition/src/Nodes/CirclePow.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePow.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp index 7a5be4d13..f4e49c023 100644 --- a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp +++ b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp index 340c1da42..903a94e32 100644 --- a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp +++ b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp index 1f348b45c..5ca1a6baa 100644 --- a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRange.cpp b/compiler/luci/partition/src/Nodes/CircleRange.cpp index f295338d8..fa1a02c71 100644 --- a/compiler/luci/partition/src/Nodes/CircleRange.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRange.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp index 59a95f119..b5b0c8aa8 100644 --- a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRank.cpp b/compiler/luci/partition/src/Nodes/CircleRank.cpp index f7cce762b..35b4764aa 100644 --- a/compiler/luci/partition/src/Nodes/CircleRank.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRank.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp index 74c520bee..5a0a71a7e 100644 --- a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp index ed762dbc6..262e12ac1 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp index 792f51187..45c292073 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp index 09586ecee..d91c78e41 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp index 8fbaf653e..2ad18f339 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp index 105214d0b..65fca6ab3 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp index c37d6248f..db48f54d7 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp index 2fb4e3e01..daac168b2 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp index cc1ac83ad..f5f69f0ff 100644 --- a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.cpp index d3617bdbd..63ac31ba9 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp index ccaf5760b..ec4d10f09 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp index fb9ba6f36..c2956c456 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp index 1341b0e06..e9ecbe2e6 100644 --- a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp index 476195b71..1141297da 100644 --- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp index 7dc63c6ef..ae60a97e5 100644 --- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.cpp index e59670453..49f7c64a7 100644 --- a/compiler/luci/partition/src/Nodes/CircleReshape.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReshape.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp index 73cbbdfcc..198cfa1b6 100644 --- a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp index 0f504015b..41fdedf2a 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp index c2d8b714b..437e448a6 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp index c985b7f51..567db4961 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp index 9cc2e558e..5dc99a385 100644 --- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp index 225d29ea5..348cdbb78 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp index 408fc0c9c..751910326 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp index d59a7de93..4b8c4a444 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp index d41ad8e66..351c6f2c0 100644 --- a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRound.cpp b/compiler/luci/partition/src/Nodes/CircleRound.cpp index 9170bcdd9..97d002870 100644 --- a/compiler/luci/partition/src/Nodes/CircleRound.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRound.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp index fad090476..02f335dc3 100644 --- a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp index 03e64aad0..44abd5ef7 100644 --- a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp index d76b96e14..39ae1f8f3 100644 --- a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp index f661a794c..e2b99c49d 100644 --- a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp index 5fae5206e..af8cd5549 100644 --- a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp index 62912b791..88a3ecf19 100644 --- a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp +++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp index f271f8843..4ce787569 100644 --- a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp index 5fc320a16..6540416c6 100644 --- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp index a6bcff20a..453b7cc01 100644 --- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.cpp index dbe1dd48f..436e95609 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelect.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelect.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp index 912934b8b..2a38de593 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp index 28072c860..a8b6ab556 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp index e8d128e93..c2ebdbe11 100644 --- a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleShape.cpp b/compiler/luci/partition/src/Nodes/CircleShape.cpp index f93cf1458..2fb3dcdd8 100644 --- a/compiler/luci/partition/src/Nodes/CircleShape.cpp +++ b/compiler/luci/partition/src/Nodes/CircleShape.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp index 9b4afdcc2..38033a3bc 100644 --- a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSin.cpp b/compiler/luci/partition/src/Nodes/CircleSin.cpp index 62c776ef6..0ef605994 100644 --- a/compiler/luci/partition/src/Nodes/CircleSin.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSin.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp index fbee6f662..e141b4530 100644 --- a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.cpp index 7895d9ece..811d81f9e 100644 --- a/compiler/luci/partition/src/Nodes/CircleSlice.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSlice.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp index 3c666ad6c..0718c7f15 100644 --- a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp index 0a93787e7..6b08f005e 100644 --- a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp index b25629863..571ad80ff 100644 --- a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp index b94948bee..dc48b36d6 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp index 279e9b232..0fcf22fd0 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp index bd4523ca8..55d562f3d 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp index 207163d08..771c1f372 100644 --- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp index d1ed18818..cc2f5e915 100644 --- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp index 2257186e8..06b3814ee 100644 --- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.cpp index d6d62a8ed..5f851f049 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplit.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplit.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp index d8d0953e0..a4242b9ab 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp index 4021f2042..1a447581e 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp index 85fe2685b..b7cf6fc7d 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp index f13205725..43ebe076f 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp index 3ac1d6c27..877a44759 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp index 2034805cd..4bac6c5dc 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp index 434dfb0ad..b3cf4d939 100644 --- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp index f737aac8d..fd6d0ec05 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp index fa7f7fe2a..be298835e 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.cpp index 1476a8694..56dd5440d 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquare.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquare.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp index bb6a7c33f..a509b31b5 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp index 40dd31706..e47be2c7e 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp index 9cfe9eefb..a900f1dc3 100644 --- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp index bc9fda296..ffe3c911b 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp index 1f0971043..7a6e2bf44 100644 --- a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp index 3bdca8a8a..953b45107 100644 --- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp +++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp index 130ff9159..3e950fd25 100644 --- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSub.cpp b/compiler/luci/partition/src/Nodes/CircleSub.cpp index 8ac294b7b..c5bea087f 100644 --- a/compiler/luci/partition/src/Nodes/CircleSub.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSub.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp index 7c0d83745..ca51865a7 100644 --- a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleSum.cpp b/compiler/luci/partition/src/Nodes/CircleSum.cpp index bef1d4676..e929fd090 100644 --- a/compiler/luci/partition/src/Nodes/CircleSum.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSum.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp index 1ed65c04f..21f6bbb74 100644 --- a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.cpp index e6c56ebf7..ef5c2c993 100644 --- a/compiler/luci/partition/src/Nodes/CircleTanh.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTanh.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp index 17cd48731..1e2d0629c 100644 --- a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTile.cpp b/compiler/luci/partition/src/Nodes/CircleTile.cpp index 0381b4dac..0c217436e 100644 --- a/compiler/luci/partition/src/Nodes/CircleTile.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTile.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp index 79d1ba16c..9449c1fa7 100644 --- a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp index ce8a6f5df..41dfa9c22 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp index f08f3f315..e0c4a3a84 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp index 6ca6e3d29..19f0fa7bf 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp index a5c1c43f7..ba085f6a9 100644 --- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp index 1cbb54666..cbbdb0090 100644 --- a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp index b3b16307c..847683844 100644 --- a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp index 469cc9a1a..6b6819d59 100644 --- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp index ee9fb0e78..68adaad81 100644 --- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp index 3f0374aac..332301455 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp index aeefef093..2630461ae 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.cpp index 79ca59466..c035b7ed7 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnique.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnique.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp index 23f299840..910087a8b 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp index f244dd6eb..23b1abaa5 100644 --- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp index 887640790..954957497 100644 --- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp index f83c5d810..43ebcb418 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp index b164cc3bc..444b04373 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp index b8982fff5..ee1de153f 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp index 9ed440966..2aaef8d04 100644 --- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp index f7f6f21fd..e7a794a16 100644 --- a/compiler/luci/partition/src/Nodes/CircleVariable.cpp +++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace luci { diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.cpp index 8ef274268..d0fc8465d 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhere.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhere.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp index 942f804c2..f17131c94 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.cpp index 7820aca01..95b77f753 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhile.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhile.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp index bffb7869d..6ee7aba62 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp index 1cb4419db..5cd68355c 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp index 901f31b01..f58eba031 100644 --- a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp index 715042d86..795d88de3 100644 --- a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp +++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" namespace { diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp index 74c873cb2..f887bc36f 100644 --- a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp +++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "ConnectNode.test.h" diff --git a/compiler/luci/partition/src/PartitionIR.cpp b/compiler/luci/partition/src/PartitionIR.cpp index 60dc74f89..969fa7092 100644 --- a/compiler/luci/partition/src/PartitionIR.cpp +++ b/compiler/luci/partition/src/PartitionIR.cpp @@ -64,7 +64,7 @@ std::unique_ptr<PGroups> PGroups::make_copy(void) const // note: d_pgroup is now nullptr as it's moved } - return std::move(d_pgroups); + return d_pgroups; } GroupKey PGroups::group_of(luci::CircleNode *node) const diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp index 4c3971bd8..aa8a827cd 100644 --- a/compiler/luci/partition/src/PartitionMerge.cpp +++ b/compiler/luci/partition/src/PartitionMerge.cpp @@ -255,7 +255,7 @@ std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups) } } while (changed); - return std::move(d_pgroups); + return d_pgroups; } } // namespace luci diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp index eaeacf9c4..2e95f08f7 100644 --- a/compiler/luci/partition/src/PartitionPGroups.cpp +++ b/compiler/luci/partition/src/PartitionPGroups.cpp @@ -257,7 +257,7 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source, } } - return std::move(pgroups); + return pgroups; } } // namespace luci diff --git a/compiler/luci/partition/src/PartitionPModules.cpp b/compiler/luci/partition/src/PartitionPModules.cpp index beaaf6093..251dbea39 100644 --- a/compiler/luci/partition/src/PartitionPModules.cpp +++ b/compiler/luci/partition/src/PartitionPModules.cpp @@ -15,7 +15,7 @@ */ #include "PartitionPModules.h" -#include "ConnectNode.h" +#include "luci/ConnectNode.h" #include "luci/Service/CircleNodeClone.h" #include "luci/Log.h" @@ -156,7 +156,7 @@ std::unique_ptr<loco::Graph> clone_graph(loco::Graph *graph_org, luci::CloneCont add_graph_output(graph_clone, output_clone); } - return std::move(graph); + return graph; } void clone_recursive_subgraphs(luci::PartedModule &pm, loco::Graph *graph, diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt index 5237c6d3f..d9d004db9 100644 --- a/compiler/luci/pass/CMakeLists.txt +++ b/compiler/luci/pass/CMakeLists.txt @@ -1,9 +1,16 @@ nnas_find_package(FlatBuffers EXACT 2.0 QUIET) +nnas_find_package(Fp16Source QUIET) + if(NOT FlatBuffers_FOUND) message(STATUS "FlatBuffers NOT FOUND") return() endif(NOT FlatBuffers_FOUND) +if(NOT Fp16Source_FOUND) + message(STATUS "Fp16Source NOT FOUND") + return() +endif(NOT Fp16Source_FOUND) + file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) @@ -14,6 +21,7 @@ endif(NOT LUCI_LIBRARY_TYPE) add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_pass PRIVATE src) +target_include_directories(luci_pass PRIVATE ${Fp16Source_DIR}/include) target_include_directories(luci_pass PUBLIC include) target_link_libraries(luci_pass PUBLIC loco) target_link_libraries(luci_pass PUBLIC logo_core) diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index c803898f6..b94822c35 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -47,8 +47,10 @@ public: ResolveCustomOpBatchMatMul, ResolveCustomOpMatMul, ResolveCustomOpMaxPoolWithArgmax, + ResolveCustomOpSplitV, FoldAddV2, FoldCast, + FoldDensify, FoldDepthwiseConv2D, FoldDequantize, FoldGather, @@ -61,6 +63,7 @@ public: ShuffleWeightTo16x1Float32, RemoveRedundantTranspose, ReplaceMulAddWithDepthwiseConv, + ReplaceNonConstFCWithBatchMatMul, ReplaceSubWithAdd, SubstitutePackToReshape, SubstitutePadV2ToPad, diff --git a/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h new file mode 100644 index 000000000..8ec81b1d4 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FOLD_DENSIFY_PASS_H__ +#define __LUCI_FOLD_DENSIFY_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to Fold Densify if input is Sparse Constant + * + */ +struct FoldDensifyPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FoldDensifyPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FOLD_DENSIFY_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h new file mode 100644 index 000000000..2deb75297 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__ +#define __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to remove redundant dequantize operations + */ +struct RemoveRedundantDequantizePass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveRedundantDequantizePass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h new file mode 100644 index 000000000..19948a31c --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__ +#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to remove unnecessary Reshape nodes. + * @details This class will remove unnecessary pre/post-Reshape nodes. + * See https://github.com/Samsung/ONE/issues/9600 for more details. + */ +struct RemoveUnnecessaryReshapeNetPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::RemoveUnnecessaryReshapeNetPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h new file mode 100644 index 000000000..24e16ec49 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__ +#define __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to replace "FC with non-const weight" with Batched MatMul + */ +struct ReplaceNonConstFCWithBatchMatMulPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ReplaceNonConstFCWithBatchMatMulPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h new file mode 100644 index 000000000..d4f0147e8 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__ +#define __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to resolve certain custom op of subgraph into splitv op in circle schema. + */ +struct ResolveCustomOpSplitVPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ResolveCustomOpSplitVPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 6dbb22d7c..74c569d20 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -20,6 +20,7 @@ #include "luci/Pass/ExpandBroadcastConstPass.h" #include "luci/Pass/FoldAddV2Pass.h" #include "luci/Pass/FoldCastPass.h" +#include "luci/Pass/FoldDensifyPass.h" #include "luci/Pass/FoldDepthwiseConv2DPass.h" #include "luci/Pass/FoldDequantizePass.h" #include "luci/Pass/FoldGatherPass.h" @@ -43,15 +44,18 @@ #include "luci/Pass/RemoveRedundantTransposePass.h" #include "luci/Pass/RemoveRedundantQuantizePass.h" #include "luci/Pass/RemoveUnnecessaryReshapePass.h" +#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h" #include "luci/Pass/RemoveUnnecessarySlicePass.h" #include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h" #include "luci/Pass/RemoveUnnecessarySplitPass.h" +#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h" #include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h" #include "luci/Pass/ReplaceSubWithAddPass.h" #include "luci/Pass/ResolveCustomOpAddPass.h" #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" #include "luci/Pass/ResolveCustomOpMatMulPass.h" #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h" +#include "luci/Pass/ResolveCustomOpSplitVPass.h" #include "luci/Pass/SparsifyTensorPass.h" #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" #include "luci/Pass/SubstitutePackToReshapePass.h" @@ -127,7 +131,8 @@ bool OptimizeOptionsImpl::query(Algorithm algo) return true; } -void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output) +// TODO Make a struct for args +void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output, bool fuse_fc) { logo::Phase phase; @@ -135,6 +140,21 @@ void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_out phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>()); phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>()); + // Resolve custom Ops + phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>()); + phase.emplace_back(std::make_unique<luci::ResolveCustomOpBatchMatMulPass>()); + phase.emplace_back(std::make_unique<luci::ResolveCustomOpMatMulPass>()); + phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>()); + phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>()); + + // Fuse FullyConnected with Add + // Why we perform FuseAddWithFullyConnectedPass before ConvertNCHWToNHWCPass? + // FullyConnected Op's layout is not changed in ConvertNCHWToNHWCPass, while + // Add Op's layer is changed from NCHW to NHWC. + // This disables fusion of Add and FullyConnected after ConvertNCHWToNHWC. + if (fuse_fc) + phase.emplace_back(std::make_unique<luci::FuseAddWithFullyConnectedPass>()); + phase.emplace_back( std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output)); @@ -190,7 +210,9 @@ void CircleOptimizer::optimize(loco::Graph *g) const bool preserve_output = _options->param(Options::AlgorithmParameters::NCHW_to_NHWC_output_shape) != "true"; - convert_nchw_to_nhwc(g, preserve_input, preserve_output); + bool fuse_fc = _options->query(Options::Algorithm::FuseAddWithFullyConnected); + + convert_nchw_to_nhwc(g, preserve_input, preserve_output, fuse_fc); } /* TRANSFORM DECLARATION BEGIN */ @@ -220,6 +242,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>()); } + if (_options->query(Options::Algorithm::ResolveCustomOpSplitV)) + { + phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>()); + } if (_options->query(Options::Algorithm::FuseInstanceNorm)) { phase.emplace_back(std::make_unique<FuseInstanceNormPass>()); @@ -260,6 +286,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::FoldCastPass>()); } + if (_options->query(Options::Algorithm::FoldDensify)) + { + phase.emplace_back(std::make_unique<luci::FoldDensifyPass>()); + } if (_options->query(Options::Algorithm::FoldDepthwiseConv2D)) { phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>()); @@ -307,6 +337,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape)) { phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapePass>()); + phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapeNetPass>()); } if (_options->query(Options::Algorithm::RemoveUnnecessarySlice)) { @@ -332,6 +363,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>()); } + if (_options->query(Options::Algorithm::ReplaceNonConstFCWithBatchMatMul)) + { + phase.emplace_back(std::make_unique<luci::ReplaceNonConstFCWithBatchMatMulPass>()); + } if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv)) { phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>()); diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp index ce38a90b9..9a6550b9f 100644 --- a/compiler/luci/pass/src/CircleQuantizer.cpp +++ b/compiler/luci/pass/src/CircleQuantizer.cpp @@ -22,6 +22,7 @@ #include "luci/Pass/RequantizePass.h" #include "luci/Pass/ConvertToFakeQuantizedModelPass.h" #include "luci/Pass/FoldDequantizePass.h" +#include "luci/Pass/RemoveRedundantDequantizePass.h" #include "luci/Pass/QuantizePreCheckerPass.h" #include "luci/Pass/QuantizeWithMinMaxPass.h" #include "luci/Pass/QuantizeDequantizeWeightsPass.h" @@ -252,8 +253,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"}; static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"}; static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"}; - static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"}; - static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"}; + static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "float32"}; + static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "float32"}; auto input_model_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); @@ -434,6 +435,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>()); phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>()); + // Remove redundant Dequantize Ops generated during fake quantization + phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>()); // Fold Dequantize Ops generated during fake quantization phase.emplace_back(std::make_unique<luci::FoldDequantizePass>()); diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp index ce4f54035..55a29d105 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp @@ -28,6 +28,69 @@ namespace { +// Return true if from can be broadcasted to to +// to's shape is [N, C, H, W] +bool broadcastable(const luci::CircleConst *from, const luci::CircleNode *to) +{ + assert(to->rank() == 4); // FIX_CALLER_UNLESS + + const auto from_rank = from->rank(); + if (from_rank > 4) + return false; + + // Scalar is always broadcastable + if (from_rank == 0) + return true; + + for (uint32_t i = 1; i <= from_rank; i++) + { + auto to_index = 4 - i; + auto from_index = from_rank - i; + + if (from->dim(from_index).value() != to->dim(to_index).value() and + from->dim(from_index).value() != 1) + return false; + } + + return true; +} + +// Expand node to rank 4 +// node should have rank less than or equal to 4 +void expand_to_rank_4(luci::CircleConst *node) +{ + auto original_rank = node->rank(); + + assert(original_rank <= 4); // FIX_CALLER_UNLESS + + if (original_rank == 4) + return; + + std::vector<uint32_t> original_shape; + for (uint32_t i = 0; i < original_rank; i++) + { + original_shape.emplace_back(node->dim(i).value()); + } + + node->rank(4); + for (uint32_t i = 0; i < (4 - original_rank); i++) + node->dim(i) = 1; + + for (uint32_t i = 0; i < original_rank; i++) + node->dim(i + (4 - original_rank)) = original_shape.at(i); +} + +bool is_output(const loco::Node *node) +{ + auto cnode = loco::must_cast<const luci::CircleNode *>(node); + auto opcode = cnode->opcode(); + if (opcode == luci::CircleOpcode::CIRCLEOUTPUT || + opcode == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE) + return true; + + return false; +} + bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape) { if (not node) @@ -484,7 +547,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node, // // Find MUL with an NCHW pattern described below // - Input (non-constant) shape : [N, C, H, W] -// - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1) +// - Input (constant) shape : broadcastable to [N, C, H, W] // - Output shape : [N, C, H, W] bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node, luci::CircleConst *&multiplier) @@ -511,32 +574,12 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod if (pred_node->rank() != 4) return false; - const auto const_rank = multiplier->rank(); - // Support Rank 4 or scalar (rank 0 or 1) - if (const_rank != 4 && const_rank != 0 && const_rank != 1) + if (not broadcastable(multiplier, node)) return false; - const auto input_cdim = pred_node->dim(1); - const auto output_cdim = node->dim(1); - - if (const_rank == 4) - { - bool supported_shape = false; - - // Check multiplier is (1, C, 1, 1) - if (is_same_shape(multiplier, {1, node->dim(1), 1, 1})) - supported_shape = true; - - // Check multiplier is (N, C, H, W) - if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) - supported_shape = true; + expand_to_rank_4(multiplier); - return supported_shape; - } - if (input_cdim == output_cdim) - return true; - else - return false; + return true; } // We assume ADD with const input is NCHW if, @@ -569,32 +612,12 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod if (pred_node->rank() != 4) return false; - const auto const_rank = beta->rank(); - // Support Rank 4 or scalar (rank 0 or 1) - if (const_rank != 4 && const_rank != 0 && const_rank != 1) + if (not broadcastable(beta, node)) return false; - const auto input_cdim = pred_node->dim(1); - const auto output_cdim = node->dim(1); - - if (const_rank == 4) - { - bool supported_shape = false; - - // Check beta is (1, C, 1, 1) - if (is_same_shape(beta, {1, node->dim(1), 1, 1})) - supported_shape = true; - - // Check beta is (N, C, H, W) - if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) - supported_shape = true; + expand_to_rank_4(beta); - return supported_shape; - } - if (input_cdim == output_cdim) - return true; - else - return false; + return true; } // We assume SUB with const input is NCHW if, @@ -675,6 +698,24 @@ template <class T> bool convert_unary_x(T *node) return true; } +template <class T> bool convert_unary_logits(T *node) +{ + const auto pred_node = loco::must_cast<luci::CircleNode *>(node->logits()); + auto pre_trans = create_pre_transpose(node); + pre_trans->a(pred_node); + node->logits(pre_trans); + + // Do shape inference for this node again. + node->shape_status(luci::ShapeStatus::UNDEFINED); + + auto post_trans = create_post_transpose(node); + loco::replace(node).with(post_trans); + + post_trans->a(node); + + return true; +} + class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool> { // Default @@ -742,17 +783,14 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool> if (is_NCHW_with_const(node, pred_node, beta)) { + assert(beta->rank() == 4); // FIX is_NCHW_with_const unless + auto nhwc_const = create_NHWC_from_NCHW(beta); + if (nhwc_const == nullptr) + return false; + node->y(nhwc_const); + auto pre_trans = create_pre_transpose(node); pre_trans->a(pred_node); - - if (beta->rank() == 4) - { - auto nhwc_const = create_NHWC_from_NCHW(beta); - if (nhwc_const == nullptr) - return false; - node->y(nhwc_const); - } - node->x(pre_trans); } else if (beta == nullptr) @@ -816,6 +854,11 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool> bool visit(luci::CircleLogistic *node) { return convert_unary_x<luci::CircleLogistic>(node); } + bool visit(luci::CircleLogSoftmax *node) + { + return convert_unary_logits<luci::CircleLogSoftmax>(node); + } + bool visit(luci::CircleMaximum *node) { luci::CircleNode *pred_node = nullptr; @@ -954,15 +997,15 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool> if (is_NCHW_with_const(node, pred_node, multiplier)) { + assert(multiplier->rank() == 4); // FIX is_NCHW_with_const unless + auto nhwc_const = create_NHWC_from_NCHW(multiplier); + if (nhwc_const == nullptr) + return false; + node->y(nhwc_const); + auto pre_trans = create_pre_transpose(node); pre_trans->a(pred_node); node->x(pre_trans); - - if (multiplier->rank() == 4) - { - auto nhwc_const = create_NHWC_from_NCHW(multiplier); - node->y(nhwc_const); - } } else if (multiplier == nullptr) { @@ -1049,12 +1092,127 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool> return true; } + // TODO Reduce duplicate code with CircleMean + bool visit(luci::CircleReduceMax *node) + { + auto input = loco::must_cast<luci::CircleNode *>(node->input()); + if (input->rank() != 4) + return false; + + auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices()); + if (not rindices) + return false; + + auto nhwc_rindices = create_NHWC_rindices(rindices); + if (not nhwc_rindices) + return false; + + auto pre_trans = create_pre_transpose(node); + pre_trans->a(input); + node->input(pre_trans); + + // Do shape inference for this node again. + node->shape_status(luci::ShapeStatus::UNDEFINED); + + node->reduction_indices(nhwc_rindices); + + if (node->keep_dims()) + { + auto post_trans = create_post_transpose(node); + loco::replace(node).with(post_trans); + + post_trans->a(node); + + return true; + } + + // The below codes handle the cases where node->keep_dims() == false + // 1D output never needs a transpose + if (node->rank() <= 1) + return true; + + std::vector<bool> reduced_dims_nhwc(4, false); + uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>(); + + for (uint32_t ri = 0; ri < num_reduced_indices; ++ri) + { + reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true; + } + + // if channel dimension has been reduced, we don't need a transpose + if (reduced_dims_nhwc[3]) + return true; + + // likewise, if both space dimensions are reduced, no transpose is needed + if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2]) + return true; + + std::vector<int32_t> post_trans_ind; + // case 1: only N is reduced + if (num_reduced_indices == 1 && reduced_dims_nhwc[0]) + post_trans_ind = {2, 0, 1}; + + // case 2: only H or W is reduced + if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2])) + post_trans_ind = {0, 2, 1}; + + // case 3: N and either H or W are reduced + if (num_reduced_indices == 2) + post_trans_ind = {1, 0}; + + auto post_trans = create_Nd_transpose(node, post_trans_ind); + loco::replace(node).with(post_trans); + + post_trans->a(node); + + return true; + } + bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); } bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); } bool visit(luci::CircleRsqrt *node) { return convert_unary_x<luci::CircleRsqrt>(node); } + bool visit(luci::CircleSoftmax *node) { return convert_unary_logits<luci::CircleSoftmax>(node); } + + bool visit(luci::CircleSplitV *node) + { + // Change split dimension + auto axis = dynamic_cast<luci::CircleConst *>(node->split_dim()); + if (not axis) + return false; + + if (axis->dtype() != loco::DataType::S32) + return false; + + if (axis->size<loco::DataType::S32>() != 1) + return false; + + axis->at<loco::DataType::S32>(0) = nchw_axis_to_nhwc(axis->at<loco::DataType::S32>(0)); + + // Insert pre-transpose + const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input()); + auto pre_trans = create_pre_transpose(node); + pre_trans->a(pred_node); + node->input(pre_trans); + + // Do shape inference for this node again. + node->shape_status(luci::ShapeStatus::UNDEFINED); + + // Insert post-transposes + for (auto succ : loco::succs(node)) + { + auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ); + + auto post_trans = create_post_transpose(svo); + loco::replace(svo).with(post_trans); + post_trans->a(svo); + } + + return true; + } + bool visit(luci::CircleSquaredDifference *node) { // TODO support CircleConst input @@ -1195,6 +1353,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) // pre-Transpose --- [intermediate Ops] --- post-Transpose // | // +--[intermediate Ops] --- post-Transpose + // + // NOTE Intermediate Ops SHOULD NOT contain pre-Transpose/Reshape for (auto node : loco::postorder_traversal(loco::output_nodes(g))) { if (has_data_format(node)) @@ -1202,25 +1362,51 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) if (is_pre_transpose(node) || is_pre_reshape(node)) { + std::set<loco::Node *> intermediate; + + // Variable to check intermediate Ops contain pre-Transpose/Reshape + bool has_pre = false; + + // Variable to check the pattern is closed with post-Transpose/Reshape + bool is_closed = true; + // For recursive call of lambda - std::function<void(loco::Node *)> set_data_format_to_succs; - set_data_format_to_succs = [&](loco::Node *n) { + std::function<void(loco::Node *)> collect_intermediate; + collect_intermediate = [&](loco::Node *n) { for (auto succ : loco::succs(n)) { // Exit condition if (is_post_transpose(succ) || is_post_reshape(succ)) continue; - if (not has_data_format(succ)) + if (is_pre_transpose(succ) || is_pre_reshape(succ)) + { + has_pre = true; + break; + } + + if (is_output(succ)) { - set_data_format(succ, DataFormat::NHWC); + is_closed = false; + break; } - set_data_format_to_succs(succ); + intermediate.emplace(succ); + + collect_intermediate(succ); } }; - set_data_format_to_succs(node); + collect_intermediate(node); + + if (has_pre or not is_closed) + continue; + + for (auto inter : intermediate) + { + if (not has_data_format(inter)) + set_data_format(inter, DataFormat::NHWC); + } } } @@ -1248,6 +1434,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) case luci::CircleOpcode::ELU: case luci::CircleOpcode::LEAKY_RELU: case luci::CircleOpcode::LOGISTIC: + case luci::CircleOpcode::LOG_SOFTMAX: case luci::CircleOpcode::MAXIMUM: case luci::CircleOpcode::MEAN: case luci::CircleOpcode::MINIMUM: @@ -1255,9 +1442,12 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) case luci::CircleOpcode::NEG: case luci::CircleOpcode::PAD: case luci::CircleOpcode::PADV2: + case luci::CircleOpcode::REDUCE_MAX: case luci::CircleOpcode::RELU: case luci::CircleOpcode::RELU6: case luci::CircleOpcode::RSQRT: + case luci::CircleOpcode::SOFTMAX: + case luci::CircleOpcode::SPLIT_V: case luci::CircleOpcode::SQUARED_DIFFERENCE: case luci::CircleOpcode::SUB: if (!has_data_format(node)) @@ -1296,7 +1486,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g) if (circle_node->rank() != 4) { // TODO replace the check above with the input rank check, and remove the condition below - if (not dynamic_cast<luci::CircleMean *>(node)) + if (not dynamic_cast<luci::CircleMean *>(node) and + not dynamic_cast<luci::CircleReduceMax *>(node)) continue; } diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp index dd81d1380..6bb3d3268 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp @@ -16,6 +16,8 @@ #include <logo/Phase.h> +#include <luci/test/TestIOGraph.h> + #include "luci/Pass/ConvertNCHWToNHWCPass.h" #include "luci/Pass/CircleShapeInferencePass.h" @@ -23,6 +25,8 @@ #include <gtest/gtest.h> +using namespace luci::test; + namespace { @@ -202,6 +206,173 @@ public: luci::CircleConst *post_shape = nullptr; }; +/** + * Graph with pre-Reshape but no post-Transpose/Reshape. + * + * BEFORE + * [Input] + * | + * [Pre-Reshape] + * | + * [Relu] + * | + * [Output] + * + * AFTER + * [Input] + * | + * [Pre-Reshape] + * | + * [Pre-Transpose] + * | + * [Relu] + * | + * [Post-Transpose] + * | + * [Output] + */ +class NoPostReshapeGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + relu = g.nodes()->create<luci::CircleRelu>(); + pre_reshape = g.nodes()->create<luci::CircleReshape>(); + pre_shape = g.nodes()->create<luci::CircleConst>(); + + pre_shape->dtype(loco::DataType::S32); + + uint32_t channel_size = 16; + auto in = loco::must_cast<luci::CircleNode *>(input); + in->shape({1, channel_size, 4, 4}); + pre_shape->shape({4}); + + pre_shape->size<loco::DataType::S32>(4); + pre_shape->at<loco::DataType::S32>(0) = 1; + pre_shape->at<loco::DataType::S32>(1) = 4; + pre_shape->at<loco::DataType::S32>(2) = 4; + pre_shape->at<loco::DataType::S32>(3) = channel_size; + + pre_reshape->tensor(input); + pre_reshape->shape(pre_shape); + relu->features(pre_reshape); + + relu->name("Relu"); + pre_reshape->name("pre-reshape"); + + return relu; + } + +public: + luci::CircleRelu *relu = nullptr; + luci::CircleReshape *pre_reshape = nullptr; + luci::CircleConst *pre_shape = nullptr; +}; + +/** + * Graph with two pre-Reshapes + * + * BEFORE + * [Input] + * | + * [Pre-Reshape] + * | + * [Relu] + * | + * [Pre-Reshape] + * | + * [Post-Reshape] + * | + * [Output] + * + * AFTER + * [Input] + * | + * [Pre-Reshape] + * | + * [Pre-Transpose] + * | + * [Relu] + * | + * [Post-Transpose] + * | + * [Pre-Reshape] + * | + * [Post-Reshape] + * | + * [Output] + */ +class ReluNotClosedGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + relu = g.nodes()->create<luci::CircleRelu>(); + pre_reshape = g.nodes()->create<luci::CircleReshape>(); + pre_reshape_2 = g.nodes()->create<luci::CircleReshape>(); + post_reshape = g.nodes()->create<luci::CircleReshape>(); + pre_shape = g.nodes()->create<luci::CircleConst>(); + pre_shape_2 = g.nodes()->create<luci::CircleConst>(); + post_shape = g.nodes()->create<luci::CircleConst>(); + + pre_shape->dtype(loco::DataType::S32); + pre_shape_2->dtype(loco::DataType::S32); + post_shape->dtype(loco::DataType::S32); + + uint32_t channel_size = 16; + auto in = loco::must_cast<luci::CircleNode *>(input); + in->shape({1, channel_size, 4, 4}); + pre_shape->shape({4}); + pre_shape_2->shape({4}); + post_shape->shape({4}); + + pre_shape->size<loco::DataType::S32>(4); + pre_shape->at<loco::DataType::S32>(0) = 1; + pre_shape->at<loco::DataType::S32>(1) = 4; + pre_shape->at<loco::DataType::S32>(2) = 4; + pre_shape->at<loco::DataType::S32>(3) = channel_size; + + pre_shape_2->size<loco::DataType::S32>(4); + pre_shape_2->at<loco::DataType::S32>(0) = 1; + pre_shape_2->at<loco::DataType::S32>(1) = 4; + pre_shape_2->at<loco::DataType::S32>(2) = channel_size; + pre_shape_2->at<loco::DataType::S32>(3) = 4; + + post_shape->size<loco::DataType::S32>(4); + post_shape->at<loco::DataType::S32>(0) = 1; + post_shape->at<loco::DataType::S32>(1) = 4; + post_shape->at<loco::DataType::S32>(2) = 4; + post_shape->at<loco::DataType::S32>(3) = channel_size; + + pre_reshape->tensor(input); + pre_reshape->shape(pre_shape); + + relu->features(pre_reshape); + + pre_reshape_2->tensor(relu); + pre_reshape_2->shape(pre_shape_2); + + post_reshape->tensor(pre_reshape_2); + post_reshape->shape(post_shape); + + relu->name("Relu"); + pre_reshape->name("pre-reshape"); + pre_reshape->name("pre-reshape-2"); + post_reshape->name("post-reshape"); + + return post_reshape; + } + +public: + luci::CircleRelu *relu = nullptr; + luci::CircleReshape *pre_reshape = nullptr; + luci::CircleReshape *pre_reshape_2 = nullptr; + luci::CircleReshape *post_reshape = nullptr; + luci::CircleConst *pre_shape = nullptr; + luci::CircleConst *pre_shape_2 = nullptr; + luci::CircleConst *post_shape = nullptr; +}; + class AddScalarGraph final : public SimpleGraph { protected: @@ -312,6 +483,22 @@ public: luci::CircleLogistic *logistic = nullptr; }; +class LogSoftmaxGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + log_softmax = g.nodes()->create<luci::CircleLogSoftmax>(); + log_softmax->logits(input); + log_softmax->name("log_softmax"); + + return log_softmax; + } + +public: + luci::CircleLogSoftmax *log_softmax = nullptr; +}; + class MaximumGraph final : public SimpleGraph { protected: @@ -642,6 +829,51 @@ public: luci::CircleConst *const_value = nullptr; }; +class ReduceMaxGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + rm = g.nodes()->create<luci::CircleReduceMax>(); + rindices = g.nodes()->create<luci::CircleConst>(); + + rm->dtype(loco::DataType::FLOAT32); + rindices->dtype(loco::DataType::S32); + + rm->shape(_shape); + rindices->shape({static_cast<uint32_t>(_axes.size())}); + + rindices->size<loco::DataType::S32>(_axes.size()); + for (uint32_t i = 0; i < _axes.size(); ++i) + { + rindices->at<loco::DataType::S32>(i) = _axes[i]; + } + + rm->input(input); + rm->reduction_indices(rindices); + rm->keep_dims(_keep_dims); + + rm->name("reduce_max"); + rindices->name("rindices"); + + return rm; + } + +public: + void keep_dims(bool val) { _keep_dims = val; } + void axes(std::vector<int32_t> val) { _axes = val; } + void shape(std::initializer_list<uint32_t> val) { _shape = val; } + +public: + luci::CircleReduceMax *rm = nullptr; + luci::CircleConst *rindices = nullptr; + +private: + bool _keep_dims = true; + std::vector<int32_t> _axes = {2, 3}; + std::initializer_list<uint32_t> _shape = {1, 16, 1, 1}; +}; + class ReluGraph final : public SimpleGraph { protected: @@ -690,6 +922,111 @@ public: luci::CircleRsqrt *rsqrt = nullptr; }; +class SoftmaxGraph final : public SimpleGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + softmax = g.nodes()->create<luci::CircleSoftmax>(); + softmax->logits(input); + softmax->name("softmax"); + + return softmax; + } + +public: + luci::CircleSoftmax *softmax = nullptr; +}; + +class SplitVGraphlet +{ +public: + SplitVGraphlet() = default; + +public: + void init(loco::Graph *g) + { + // CircleCustom(SplitV) + _splitv = g->nodes()->create<luci::CircleSplitV>(); + _splitv->shape({1, 2, 2, 192}); + _splitv->dtype(loco::DataType::FLOAT32); + _splitv->name("splitv"); + + // CircleConst + auto size_splits = g->nodes()->create<luci::CircleConst>(); + size_splits->dtype(loco::DataType::S32); + size_splits->shape({3}); + size_splits->size<loco::DataType::S32>(3); + size_splits->at<loco::DataType::S32>(0) = 32; + size_splits->at<loco::DataType::S32>(1) = 32; + size_splits->at<loco::DataType::S32>(2) = 128; + + // CircleConst + auto split_dim = g->nodes()->create<luci::CircleConst>(); + split_dim->dtype(loco::DataType::S32); + split_dim->rank(0); + split_dim->size<loco::DataType::S32>(1); + split_dim->scalar<loco::DataType::S32>() = 3; + + _splitv->size_splits(size_splits); + _splitv->split_dim(split_dim); + _splitv->num_split(3); + + // CircleSplitVOut + _splitv_out1 = g->nodes()->create<luci::CircleSplitVOut>(); + _splitv_out1->shape({1, 2, 2, 32}); + _splitv_out1->dtype(loco::DataType::FLOAT32); + _splitv_out1->index(0); + _splitv_out1->input(_splitv); + _splitv_out1->name("splitv_out1"); + + // CircleSplitVOut + _splitv_out2 = g->nodes()->create<luci::CircleSplitVOut>(); + _splitv_out2->shape({1, 2, 2, 32}); + _splitv_out2->dtype(loco::DataType::FLOAT32); + _splitv_out2->index(1); + _splitv_out2->input(_splitv); + _splitv_out2->name("splitv_out2"); + + // CircleSplitVOut + _splitv_out3 = g->nodes()->create<luci::CircleSplitVOut>(); + _splitv_out3->shape({1, 2, 2, 128}); + _splitv_out3->dtype(loco::DataType::FLOAT32); + _splitv_out3->index(2); + _splitv_out3->input(_splitv); + _splitv_out3->name("splitv_out3"); + } + +public: + luci::CircleSplitV *splitv() { return _splitv; } + +protected: + luci::CircleSplitV *_splitv = nullptr; + luci::CircleSplitVOut *_splitv_out1 = nullptr; + luci::CircleSplitVOut *_splitv_out2 = nullptr; + luci::CircleSplitVOut *_splitv_out3 = nullptr; +}; + +class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet +{ +public: + SplitVGraph() = default; + + void init(void) + { + TestIGraphlet::init(g(), {1, 2, 2, 192}); + TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}}); + SplitVGraphlet::init(g()); + + // connect graph + _splitv->input(input()); + + output(0)->from(_splitv_out1); + output(1)->from(_splitv_out2); + output(2)->from(_splitv_out3); + } +}; + class SquaredDifferenceGraph final : public SimpleGraph { protected: @@ -929,8 +1266,11 @@ TEST(ConvertNCHWToNHWC, AddScalar) auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y()); EXPECT_NE(nullptr, new_beta); - EXPECT_EQ(1, new_beta->rank()); + EXPECT_EQ(4, new_beta->rank()); EXPECT_EQ(1, new_beta->dim(0).value()); + EXPECT_EQ(1, new_beta->dim(1).value()); + EXPECT_EQ(1, new_beta->dim(2).value()); + EXPECT_EQ(1, new_beta->dim(3).value()); check_pre_trans(g.output->from()); } @@ -1017,6 +1357,26 @@ TEST(ConvertNCHWToNHWC, Logistic) EXPECT_EQ(16, g.logistic->dim(3).value()); } +TEST(ConvertNCHWToNHWC, LogSoftmax) +{ + LogSoftmaxGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.log_softmax->logits()); + + auto log_softmax_succs = loco::succs(g.log_softmax); + EXPECT_EQ(1, log_softmax_succs.size()); + check_post_trans(*log_softmax_succs.begin()); + + // Check log_softmax shape + EXPECT_EQ(1, g.log_softmax->dim(0).value()); + EXPECT_EQ(4, g.log_softmax->dim(1).value()); + EXPECT_EQ(4, g.log_softmax->dim(2).value()); + EXPECT_EQ(16, g.log_softmax->dim(3).value()); +} + TEST(ConvertNCHWToNHWC, Maximum) { MaximumGraph g; @@ -1265,8 +1625,11 @@ TEST(ConvertNCHWToNHWC, MulScalar) auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y()); EXPECT_NE(nullptr, new_multiplier); - EXPECT_EQ(1, new_multiplier->rank()); + EXPECT_EQ(4, new_multiplier->rank()); EXPECT_EQ(1, new_multiplier->dim(0).value()); + EXPECT_EQ(1, new_multiplier->dim(1).value()); + EXPECT_EQ(1, new_multiplier->dim(2).value()); + EXPECT_EQ(1, new_multiplier->dim(3).value()); check_pre_trans(g.output->from()); } @@ -1451,6 +1814,85 @@ TEST(ConvertNCHWToNHWC, Preserve_Input_Output) } } +TEST(ConvertNCHWToNHWC, ReduceMax) +{ + ReduceMaxGraph g; + g.init(); + + run_phase(&g.g, false, false); + + check_pre_trans(g.rm->input()); + + auto rm_succs = loco::succs(g.rm); + EXPECT_EQ(1, rm_succs.size()); + check_post_trans(*rm_succs.begin()); + + auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices()); + EXPECT_NE(nullptr, new_rindices); + EXPECT_EQ(1, new_rindices->rank()); + EXPECT_EQ(2, new_rindices->dim(0).value()); + EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>()); + EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0)); + EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1)); +} + +TEST(ConvertNCHWToNHWC, ReduceMax_keep_dims_false) +{ + struct TC + { + std::vector<int32_t> nchw_ind; + std::vector<int32_t> nhwc_ind; + std::initializer_list<uint32_t> shape; + bool needs_transpose = false; + }; + + uint32_t n = 1; + uint32_t c = 16; + uint32_t h = 4; + uint32_t w = 4; + + std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true}, {{1}, {3}, {n, h, w}, false}, + {{2}, {1}, {n, c, w}, true}, {{3}, {2}, {n, c, h}, true}, + {{0, 1}, {0, 3}, {h, w}, false}, {{0, 2}, {0, 1}, {c, w}, true}, + {{0, 3}, {0, 2}, {c, h}, true}, {{1, 2}, {3, 1}, {n, w}, false}, + {{1, 3}, {3, 2}, {n, h}, false}, {{2, 3}, {1, 2}, {n, c}, false}, + {{0, 1, 2}, {0, 3, 1}, {w}, false}}; + + for (auto &tc : test_cases) + { + ReduceMaxGraph g; + g.keep_dims(false); + g.axes(tc.nchw_ind); + g.shape(tc.shape); + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.rm->input()); + + auto rm_succs = loco::succs(g.rm); + EXPECT_EQ(1, rm_succs.size()); + if (tc.needs_transpose) + { + EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin())); + } + else + { + EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin())); + } + + auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices()); + EXPECT_NE(nullptr, new_rindices); + EXPECT_EQ(1, new_rindices->rank()); + EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value()); + EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>()); + for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i) + { + EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i)); + } + } +} + TEST(ConvertNCHWToNHWC, Relu) { ReluGraph g; @@ -1511,6 +1953,57 @@ TEST(ConvertNCHWToNHWC, Rsqrt) EXPECT_EQ(16, g.rsqrt->dim(3).value()); } +TEST(ConvertNCHWToNHWC, Softmax) +{ + SoftmaxGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.softmax->logits()); + + auto softmax_succs = loco::succs(g.softmax); + EXPECT_EQ(1, softmax_succs.size()); + check_post_trans(*softmax_succs.begin()); + + // Check softmax shape + EXPECT_EQ(1, g.softmax->dim(0).value()); + EXPECT_EQ(4, g.softmax->dim(1).value()); + EXPECT_EQ(4, g.softmax->dim(2).value()); + EXPECT_EQ(16, g.softmax->dim(3).value()); +} + +TEST(ConvertNCHWToNHWC, SplitV) +{ + SplitVGraph g; + g.init(); + + run_phase(g.g(), true, true); + + check_pre_trans(g.splitv()->input()); + + auto splitv_succs = loco::succs(g.splitv()); + for (auto svo : loco::succs(g.splitv())) + { + for (auto succ : loco::succs(svo)) + { + check_post_trans(succ); + } + } + + // Check splitv() shape + EXPECT_EQ(1, g.splitv()->dim(0).value()); + EXPECT_EQ(2, g.splitv()->dim(1).value()); + EXPECT_EQ(192, g.splitv()->dim(2).value()); + EXPECT_EQ(2, g.splitv()->dim(3).value()); + + // Check axis + auto axis = dynamic_cast<luci::CircleConst *>(g.splitv()->split_dim()); + EXPECT_NE(nullptr, axis); + EXPECT_EQ(1, axis->size<loco::DataType::S32>()); + EXPECT_EQ(2, axis->at<loco::DataType::S32>(0)); +} + TEST(ConvertNCHWToNHWC, SquaredDifference) { SquaredDifferenceGraph g; @@ -1602,3 +2095,31 @@ TEST(ConvertNCHWToNHWC, SubScalar) check_pre_trans(g.output->from()); } + +TEST(ConvertNCHWToNHWC, Not_Closed_Case1_NEG) +{ + NoPostReshapeGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.relu->features()); + + auto relu_succs = loco::succs(g.relu); + EXPECT_EQ(1, relu_succs.size()); + check_post_trans(*relu_succs.begin()); +} + +TEST(ConvertNCHWToNHWC, Not_Closed_Case2_NEG) +{ + ReluNotClosedGraph g; + g.init(); + + run_phase(&g.g, true, true); + + check_pre_trans(g.relu->features()); + + auto relu_succs = loco::succs(g.relu); + EXPECT_EQ(1, relu_succs.size()); + check_post_trans(*relu_succs.begin()); +} diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp index 11970fff5..72f590135 100644 --- a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp +++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp @@ -184,8 +184,63 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void> // For non-const activation, insert Quantize-Dequantize Ops // and dequantize the node - void visit(luci::CircleConv2D *node) { fq_activation(node); } void visit(luci::CircleAdd *node) { fq_activation(node); } + void visit(luci::CircleAveragePool2D *node) { fq_activation(node); } + void visit(luci::CircleBatchMatMul *node) { fq_activation(node); } + void visit(luci::CircleConv2D *node) { fq_activation(node); } + void visit(luci::CircleDepthwiseConv2D *node) { fq_activation(node); } + void visit(luci::CircleDiv *node) { fq_activation(node); } + void visit(luci::CircleFullyConnected *node) { fq_activation(node); } + void visit(luci::CircleInstanceNorm *node) { fq_activation(node); } + void visit(luci::CircleLeakyRelu *node) { fq_activation(node); } + void visit(luci::CircleLogistic *node) { fq_activation(node); } + void visit(luci::CircleLogSoftmax *node) { fq_activation(node); } + void visit(luci::CircleMaxPool2D *node) { fq_activation(node); } + void visit(luci::CircleMul *node) { fq_activation(node); } + void visit(luci::CircleNeg *node) { fq_activation(node); } + void visit(luci::CirclePad *node) { fq_activation(node); } + void visit(luci::CirclePRelu *node) { fq_activation(node); } + void visit(luci::CircleMean *node) { fq_activation(node); } + void visit(luci::CircleReduceMax *node) { fq_activation(node); } + void visit(luci::CircleRelu *node) { fq_activation(node); } + void visit(luci::CircleRelu6 *node) { fq_activation(node); } + void visit(luci::CircleResizeBilinear *node) { fq_activation(node); } + void visit(luci::CircleResizeNearestNeighbor *node) { fq_activation(node); } + void visit(luci::CircleRsqrt *node) { fq_activation(node); } + void visit(luci::CircleSoftmax *node) { fq_activation(node); } + void visit(luci::CircleSqrt *node) { fq_activation(node); } + void visit(luci::CircleTanh *node) { fq_activation(node); } + void visit(luci::CircleTransposeConv *node) { fq_activation(node); } + + // For Ops that do not change the value of input, do nothing + // (dtype will be automatically updated by type inference) + void visit(luci::CircleCast *) {} + void visit(luci::CircleConcatenation *) {} + void visit(luci::CircleGather *) {} + void visit(luci::CircleSlice *) {} + void visit(luci::CircleStridedSlice *) {} + void visit(luci::CircleReshape *) {} + void visit(luci::CircleSplit *) {} + void visit(luci::CircleSplitOut *) {} + void visit(luci::CircleSplitV *) {} + void visit(luci::CircleSplitVOut *) {} + void visit(luci::CircleTranspose *) {} + + // For Ops that return index, fake quantization is unnecessary + void visit(luci::CircleArgMax *) {} + + // Virtual node + void visit(luci::CircleOutputExclude *) {} + + void visit(luci::CircleQuantize *node) + { + RETURN_UNLESS(is_quant_act(node)); + + insert_dequantize(node); + } + + // Dequantize Op does nothing in fp32 model + void visit(luci::CircleDequantize *) {} }; #undef RETURN_UNLESS diff --git a/compiler/luci/pass/src/FoldDensifyPass.cpp b/compiler/luci/pass/src/FoldDensifyPass.cpp new file mode 100644 index 000000000..5ddc743e5 --- /dev/null +++ b/compiler/luci/pass/src/FoldDensifyPass.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldDensifyPass.h" +#include "helpers/SparsityFormatConverter.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Profile/CircleNodeOrigin.h> + +#include <cassert> +#include <vector> + +namespace +{ + +bool is_foldable_const(luci::CircleConst *node) +{ + if (node->sparsityparam() == nullptr) + return false; + + if (node->dtype() == loco::DataType::FLOAT32) + return true; + if (node->dtype() == loco::DataType::FLOAT16) + return true; + + return false; +} + +luci::CircleConst *densified_const_node(luci::CircleConst *const_node) +{ + assert(const_node->sparsityparam()); + + auto name = const_node->name(); + assert(name.length() > 0); + auto g = const_node->graph(); + auto new_const_node = g->nodes()->create<luci::CircleConst>(); + + new_const_node->dtype(const_node->dtype()); + new_const_node->rank(const_node->rank()); + + uint32_t dim_size = 1; + std::vector<int> dense_shape; + for (uint32_t i = 0; i < new_const_node->rank(); ++i) + { + assert(const_node->dim(i).known()); + new_const_node->dim(i) = const_node->dim(i); + + uint32_t value = const_node->dim(i).value(); + dim_size *= value; + dense_shape.emplace_back(static_cast<int32_t>(value)); + } + + if (const_node->dtype() == loco::DataType::FLOAT32) + new_const_node->size<loco::DataType::FLOAT32>(dim_size); + else + { + assert(const_node->dtype() == loco::DataType::FLOAT16); + new_const_node->size<loco::DataType::FLOAT16>(dim_size); + } + + new_const_node->shape_status(luci::ShapeStatus::VALID); + new_const_node->name(name + "_DS"); + + if (const_node->dtype() == loco::DataType::FLOAT32) + { + auto const_items = const_node->size<loco::DataType::FLOAT32>(); + auto f_data = std::make_unique<float[]>(const_items); + for (size_t i = 0; i < const_items; ++i) + f_data[i] = const_node->at<loco::DataType::FLOAT32>(i); + + sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam()); + sparsity::FormatConverter<float> converter(dense_shape, sp); + converter.SparseToDense(f_data.get()); + const auto &data_dense = converter.GetData(); + assert(data_dense.size() == dim_size); + + for (uint32_t i = 0; i < dim_size; ++i) + new_const_node->at<loco::DataType::FLOAT32>(i) = data_dense[i]; + + luci::freeTfLiteSparsity(sp); + } + else + { + assert(const_node->dtype() == loco::DataType::FLOAT16); + + auto const_items = const_node->size<loco::DataType::FLOAT16>(); + auto f_data = std::make_unique<uint16_t[]>(const_items); + for (size_t i = 0; i < const_items; ++i) + f_data[i] = const_node->at<loco::DataType::FLOAT16>(i); + + // Primitive type for FLOAT16 is UINT16 + sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam()); + sparsity::FormatConverter<uint16_t> converter(dense_shape, sp); + converter.SparseToDense(f_data.get()); + const auto &data_dense = converter.GetData(); + assert(data_dense.size() == dim_size); + for (uint32_t i = 0; i < dim_size; ++i) + new_const_node->at<loco::DataType::FLOAT16>(i) = data_dense[i]; + + luci::freeTfLiteSparsity(sp); + } + + return new_const_node; +} + +/** + * @brief Fold Densify if input is Sparse Constant + */ +bool fold_densify(luci::CircleDensify *densify) +{ + auto const_input = dynamic_cast<luci::CircleConst *>(densify->input()); + if (not const_input) + return false; + + if (not is_foldable_const(const_input)) + return false; + + auto dense_const = densified_const_node(const_input); + assert(dense_const); + + loco::replace(densify).with(dense_const); + luci::add_origin(dense_const, luci::composite_origin( + {luci::get_origin(densify), luci::get_origin(const_input)})); + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * BEFORE + * + * [CircleConst](sparse) + * | + * [CircleDensify] + * | + * [CircleNode] + * | + * + * AFTER + * + * [CircleConst](dense) [CircleConst](sparse) + * | | + * [CircleNode] [CircleDensify] + * | + */ +bool FoldDensifyPass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto densify = dynamic_cast<luci::CircleDensify *>(node)) + { + if (fold_densify(densify)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FoldDensifyPass.test.cpp b/compiler/luci/pass/src/FoldDensifyPass.test.cpp new file mode 100644 index 000000000..2f9736f49 --- /dev/null +++ b/compiler/luci/pass/src/FoldDensifyPass.test.cpp @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldDensifyPass.h" +#include "PassTestGraphs.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +class FoldDensifyPassGraph : public luci::ConstantFoldingAddTestGraph +{ +public: + FoldDensifyPassGraph(std::initializer_list<uint32_t> shape) + : luci::ConstantFoldingAddTestGraph(shape, loco::DataType::FLOAT32) + { + _densify = _g.nodes()->create<luci::CircleDensify>(); + _x = _g.nodes()->create<luci::CircleConst>(); + + _densify->dtype(loco::DataType::FLOAT32); + _x->dtype(loco::DataType::FLOAT32); + + _densify->shape(shape); + _x->shape(shape); + + _densify->input(_x); + + _densify->name("densify"); + _x->name("x"); + } + + loco::Node *createFoldedPattern() override { return _densify; } + +public: + void fill_const_dense(void) + { + uint32_t num_elems = 1; + for (uint32_t r = 0; r < _x->rank(); ++r) + num_elems *= _x->dim(r).value(); + + _x->size<loco::DataType::FLOAT32>(num_elems); + for (uint32_t i = 0; i < num_elems; i++) + _x->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i + 1); + } + + void fill_const_sparse(void) + { + // fill 4x4 of + // [[1 0 0 0] + // [0 2 0 0] + // [0 0 3 0] + // [0 0 0 4]] + + // values of 1.0, 2.0, 3.0, 4.0 + uint32_t udata[] = {0x3f800000, 0x40000000, 0x40400000, 0x40800000}; + float *fdata = reinterpret_cast<float *>(udata); + + _x->size<loco::DataType::FLOAT32>(4); + for (uint32_t i = 0; i < 4; i++) + _x->at<loco::DataType::FLOAT32>(i) = fdata[i]; + + auto sparsityparam = std::make_unique<luci::SparsityParam>(); + sparsityparam->traversal_order = std::vector<int32_t>({0, 1}); + sparsityparam->block_map = std::vector<int32_t>({}); + + auto dm0 = luci::DimMetaData(luci::DimensionType::DENSE, 4); + + std::vector<int32_t> as_vec = {0, 1, 2, 3, 4}; + std::vector<int32_t> ai_vec = {0, 1, 2, 3}; + auto as = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, as_vec); + auto ai = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, ai_vec); + auto dm1 = luci::DimMetaData(luci::DimensionType::SPARSE_CSR, 0, as, ai); + sparsityparam->dim_metadata.emplace_back(dm0); + sparsityparam->dim_metadata.emplace_back(dm1); + + _x->sparsityparam(std::move(sparsityparam)); + } + +protected: + luci::CircleDensify *_densify = nullptr; + luci::CircleConst *_x = nullptr; +}; + +class FoldDensifyPassGraphTest : public FoldDensifyPassGraph, public ::testing::Test +{ +public: + FoldDensifyPassGraphTest() : FoldDensifyPassGraph({4, 4}) {} + + virtual void SetUp() { init(); } +}; + +} // namespace + +TEST(FoldDensifyPassGraph, name) +{ + luci::FoldDensifyPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(FoldDensifyPassGraphTest, no_sparsity_param_NEG) +{ + fill_const_dense(); + + luci::FoldDensifyPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(FoldDensifyPassGraphTest, sparsity_param) +{ + fill_const_sparse(); + + luci::FoldDensifyPass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + EXPECT_EQ(2, folded_const->rank()); + EXPECT_EQ(4, folded_const->dim(0).value()); + EXPECT_EQ(4, folded_const->dim(1).value()); + EXPECT_EQ(16, folded_const->size<loco::DataType::FLOAT32>()); + for (int y = 0; y < 4; ++y) + { + for (int x = 0; x < 4; ++x) + { + float ovalue = folded_const->at<loco::DataType::FLOAT32>(y * 4 + x); + float fvalue = 0.0; + if (x == y) + { + // diagonal position + fvalue = static_cast<float>(y + 1); + } + EXPECT_EQ(fvalue, ovalue); + } + } +} diff --git a/compiler/luci/pass/src/FoldDequantizePass.cpp b/compiler/luci/pass/src/FoldDequantizePass.cpp index 3dd4f8cea..b6526deb0 100644 --- a/compiler/luci/pass/src/FoldDequantizePass.cpp +++ b/compiler/luci/pass/src/FoldDequantizePass.cpp @@ -19,6 +19,8 @@ #include <luci/IR/CircleNodes.h> #include <luci/Profile/CircleNodeOrigin.h> +#include <fp16.h> + namespace { @@ -32,6 +34,9 @@ bool is_hybrid_kernel_supported(loco::Node *node) bool is_foldable_const(luci::CircleConst *node) { + if (node->dtype() == loco::DataType::FLOAT16) + return true; + if (node->quantparam() == nullptr) return false; @@ -39,17 +44,18 @@ bool is_foldable_const(luci::CircleConst *node) return true; if (node->dtype() == loco::DataType::U8) return true; + if (node->dtype() == loco::DataType::S16) + return true; + if (node->dtype() == loco::DataType::S32) + return true; + if (node->dtype() == loco::DataType::S64) + return true; return false; } luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node) { - if (const_node->quantparam() == nullptr) - { - throw std::runtime_error("Given constant node has no quantization parameter"); - } - auto name = const_node->name(); assert(name.length() > 0); auto g = const_node->graph(); @@ -67,38 +73,70 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node) new_const_node->shape_status(luci::ShapeStatus::VALID); new_const_node->name(name + "_DQ"); + if (const_node->dtype() == loco::DataType::FLOAT16) + { + for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i) + { + auto raw = const_node->at<loco::DataType::FLOAT16>(i); + new_const_node->at<loco::DataType::FLOAT32>(i) = fp16_ieee_to_fp32_value(raw); + } + return new_const_node; + } + + if (const_node->quantparam() == nullptr) + { + throw std::runtime_error("Given constant node has no quantization parameter"); + } + const int32_t q_dim = const_node->quantparam()->quantized_dimension; - const int32_t q_dim_value = const_node->dim(q_dim).value(); + // For scalar, q_dim_value is 1 + // For non-scalar, q_dim_value is the size of quantized dimension + const int32_t q_dim_value = const_node->rank() == 0 ? 1 : const_node->dim(q_dim).value(); int32_t right_count = q_dim_value; for (uint32_t i = q_dim + 1; i < const_node->rank(); ++i) right_count *= const_node->dim(i).value(); - if (const_node->dtype() == loco::DataType::S8) + for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i) { - for (uint32_t i = 0; i < const_node->size<loco::DataType::S8>(); ++i) - { - uint32_t qd = (i % right_count) / (right_count / q_dim_value); - if (qd >= const_node->quantparam()->zerop.size()) - qd = 0; + uint32_t qd = (i % right_count) / (right_count / q_dim_value); + if (qd >= const_node->quantparam()->zerop.size()) + qd = 0; - new_const_node->at<loco::DataType::FLOAT32>(i) = - (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) * - const_node->quantparam()->scale.at(qd); - } - } - else - { - for (uint32_t i = 0; i < const_node->size<loco::DataType::U8>(); ++i) + switch (const_node->dtype()) { - uint32_t qd = (i % right_count) / (right_count / q_dim_value); - if (qd >= const_node->quantparam()->zerop.size()) - qd = 0; - - new_const_node->at<loco::DataType::FLOAT32>(i) = - (float)((int)const_node->at<loco::DataType::U8>(i) - - const_node->quantparam()->zerop.at(qd)) * - const_node->quantparam()->scale.at(qd); + case loco::DataType::S8: + new_const_node->at<loco::DataType::FLOAT32>(i) = + static_cast<float>(const_node->at<loco::DataType::S8>(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::S16: + new_const_node->at<loco::DataType::FLOAT32>(i) = + static_cast<float>(const_node->at<loco::DataType::S16>(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::S32: + new_const_node->at<loco::DataType::FLOAT32>(i) = + static_cast<float>(const_node->at<loco::DataType::S32>(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::S64: + new_const_node->at<loco::DataType::FLOAT32>(i) = + static_cast<float>(const_node->at<loco::DataType::S64>(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + case loco::DataType::U8: + new_const_node->at<loco::DataType::FLOAT32>(i) = + static_cast<float>(const_node->at<loco::DataType::U8>(i) - + const_node->quantparam()->zerop.at(qd)) * + const_node->quantparam()->scale.at(qd); + break; + default: + throw std::runtime_error("Not supported dtype for FoldDequantizePass"); } } @@ -160,7 +198,7 @@ bool FoldDequantizePass::run(loco::Graph *g) { bool changed = false; - for (auto node : loco::all_nodes(g)) + for (auto node : loco::active_nodes(loco::output_nodes(g))) { if (auto circle_dequant = dynamic_cast<luci::CircleDequantize *>(node)) { diff --git a/compiler/luci/pass/src/FoldDequantizePass.test.cpp b/compiler/luci/pass/src/FoldDequantizePass.test.cpp index d82a7bc87..fb5b6adc0 100644 --- a/compiler/luci/pass/src/FoldDequantizePass.test.cpp +++ b/compiler/luci/pass/src/FoldDequantizePass.test.cpp @@ -15,12 +15,389 @@ */ #include "luci/Pass/FoldDequantizePass.h" +#include "PassTestGraphs.h" #include <gtest/gtest.h> +namespace +{ + +template <loco::DataType DT> +class FoldDequantizeTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test +{ +public: + FoldDequantizeTest() : luci::ConstantFoldingAddTestGraph({2, 2, 2}, DT) {} + + virtual void SetUp() { init(); } + + loco::Node *createFoldedPattern() override + { + _dequantize = _g.nodes()->create<luci::CircleDequantize>(); + _input = _g.nodes()->create<luci::CircleConst>(); + + _dequantize->dtype(loco::DataType::FLOAT32); + _input->dtype(DT); + + _input->shape({2, 2, 2}); + + _input->size<DT>(8); + _input->at<DT>(0) = 0; + _input->at<DT>(1) = 1; + _input->at<DT>(2) = 2; + _input->at<DT>(3) = 3; + _input->at<DT>(4) = 4; + _input->at<DT>(5) = 5; + _input->at<DT>(6) = 6; + _input->at<DT>(7) = 7; + + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->quantized_dimension = 1; + qparam->scale.push_back(5.0); + qparam->scale.push_back(10.0); + qparam->zerop.push_back(1); + qparam->zerop.push_back(2); + _input->quantparam(std::move(qparam)); + + _dequantize->input(_input); + + _dequantize->name("dequantize"); + _input->name("input"); + + return _dequantize; + } + + void createScalarPattern() + { + _input->rank(0); + _input->size<DT>(1); + _input->at<DT>(0) = 1; + + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->quantized_dimension = 0; + qparam->scale.push_back(1.0); + qparam->zerop.push_back(0); + _input->quantparam(std::move(qparam)); + } + + void createNotFoldablePattern() { _input->quantparam(nullptr); } + +protected: + luci::CircleDequantize *_dequantize = nullptr; + luci::CircleConst *_input = nullptr; +}; + +class S8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S8> +{ +}; + +class S16FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S16> +{ +}; + +class S32FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S32> +{ +}; + +class S64FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S64> +{ +}; + +class U8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::U8> +{ +}; + +class F16FoldDequantizeTest : public luci::ConstantFoldingTestGraph, public ::testing::Test +{ +public: + F16FoldDequantizeTest() : ConstantFoldingTestGraph({2, 2}, loco::DataType::FLOAT16) {} + + virtual void SetUp() { init(); } + + loco::Node *createFoldedPattern() override + { + const auto DT = loco::DataType::FLOAT16; + _dequantize = _g.nodes()->create<luci::CircleDequantize>(); + _f16const = _g.nodes()->create<luci::CircleConst>(); + + _dequantize->dtype(loco::DataType::FLOAT32); + _f16const->dtype(DT); + + _f16const->shape({2, 2}); + + _f16const->size<loco::DataType::FLOAT16>(4); + _f16const->at<DT>(0) = 49408; // -2.5f + _f16const->at<DT>(1) = 47104; // -0.5f + _f16const->at<DT>(2) = 0; // 0.0f + _f16const->at<DT>(3) = 15872; // 1.5f + // NOTE how to get uint16_t value of float16 ? + // Use compiler/souschef/src/Gaussian.cpp GaussianFloat16DataChef::generate() + // uint16_t value = fp16_ieee_from_fp32_value(-2.5); + // printf("-2.5 = %u\r\n", value); + + _dequantize->input(_f16const); + + _dequantize->name("dequantize"); + _f16const->name("input"); + + _output->from(_dequantize); + + return _dequantize; + } + + void createNotFoldablePattern() { _dequantize->input(_input); } + +protected: + luci::CircleConst *getFoldedPattern() override + { + return dynamic_cast<luci::CircleConst *>(_output->from()); + } + + void init() override { createFoldedPattern(); } + +protected: + luci::CircleDequantize *_dequantize = nullptr; + luci::CircleConst *_f16const = nullptr; +}; + +} // namespace + TEST(FoldDequantizePassTest, name) { luci::FoldDequantizePass pass; auto const name = pass.name(); ASSERT_NE(nullptr, name); } + +TEST_F(U8FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2)); + EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3)); + EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4)); + EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5)); + EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6)); + EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7)); +} + +TEST_F(U8FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S8FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2)); + EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3)); + EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4)); + EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5)); + EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6)); + EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7)); +} + +TEST_F(S8FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S16FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2)); + EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3)); + EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4)); + EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5)); + EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6)); + EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7)); +} + +TEST_F(S16FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S32FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2)); + EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3)); + EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4)); + EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5)); + EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6)); + EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7)); +} + +TEST_F(S32FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(S64FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(3, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(2, folded_const->dim(2).value()); + EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2)); + EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3)); + EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4)); + EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5)); + EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6)); + EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7)); +} + +TEST_F(S64FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} + +TEST_F(U8FoldDequantizeTest, fold_dequant_scalar) +{ + createScalarPattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Check type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(0, folded_const->rank()); + EXPECT_EQ(1.0, folded_const->at<loco::DataType::FLOAT32>(0)); +} + +TEST_F(F16FoldDequantizeTest, fold_dequant_basic) +{ + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_NE(nullptr, folded_const); + + // Chec type, shape, values of folded const + EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype()); + EXPECT_EQ(2, folded_const->rank()); + EXPECT_EQ(2, folded_const->dim(0).value()); + EXPECT_EQ(2, folded_const->dim(1).value()); + EXPECT_EQ(-2.5, folded_const->at<loco::DataType::FLOAT32>(0)); + EXPECT_EQ(-0.5, folded_const->at<loco::DataType::FLOAT32>(1)); + EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2)); + EXPECT_EQ(1.5, folded_const->at<loco::DataType::FLOAT32>(3)); +} + +TEST_F(F16FoldDequantizeTest, fold_dequant_basic_NEG) +{ + createNotFoldablePattern(); + + luci::FoldDequantizePass pass; + while (pass.run(graph())) + ; + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(nullptr, folded_const); +} diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.cpp index 0c6fc43ed..ed60d8899 100644 --- a/compiler/luci/pass/src/FoldSparseToDensePass.cpp +++ b/compiler/luci/pass/src/FoldSparseToDensePass.cpp @@ -19,6 +19,8 @@ #include <luci/IR/CircleNodes.h> +#include <limits> + namespace { diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp index 2c990f0a5..bc09abee2 100644 --- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp +++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp @@ -22,6 +22,7 @@ #include <luci/Profile/CircleNodeOrigin.h> #include <luci/Service/CircleShapeInference.h> #include <luci/Service/Nodes/CircleConst.h> +#include <luci/Service/CircleNodeClone.h> namespace { @@ -55,6 +56,26 @@ void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape) new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r); } +luci::CircleReshape *create_cloned_reshape(luci::CircleReshape *reshape) +{ + assert(reshape != nullptr); // FIX_CALLER_UNLESS + + luci::CircleConst *cloned_shape = clone_shape(reshape); + if (cloned_shape == nullptr) + return nullptr; + + auto cloned_node = luci::clone_node(reshape, reshape->graph()); + if (cloned_node == nullptr) + return nullptr; + + auto new_reshape = loco::must_cast<luci::CircleReshape *>(cloned_node); + new_reshape->shape(cloned_shape); + new_reshape->name(reshape->name() + "_C"); + luci::add_origin(new_reshape, luci::get_origin(reshape)); + + return new_reshape; +} + bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg) { assert(reshape != nullptr); @@ -85,6 +106,26 @@ bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg) return true; } +bool forward_reshape(luci::CircleReshape *reshape, luci::CircleLogistic *logit) +{ + assert(reshape != nullptr); // FIX_CALLER_UNLESS + assert(logit != nullptr); // FIX_CALLER_UNLESS + + auto new_reshape = create_cloned_reshape(reshape); + if (not new_reshape) + return false; + + // reconnect network + loco::replace(logit).with(new_reshape); + logit->x(reshape->tensor()); + new_reshape->tensor(logit); + + // Do shape inference for this node again. + logit->shape_status(luci::ShapeStatus::UNDEFINED); + + return true; +} + class ForwardReshape final : public luci::CircleNodeMutableVisitor<bool> { protected: @@ -103,6 +144,14 @@ protected: return forward_reshape(reshape, node); } + bool visit(luci::CircleLogistic *node) + { + auto reshape = as_reshape(node->x()); + if (reshape == nullptr) + return false; + + return forward_reshape(reshape, node); + } // TODO add more unary operators }; diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp index 2593a014c..373513270 100644 --- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp +++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp @@ -65,6 +65,42 @@ protected: luci::CircleConst *_reshape_shape = nullptr; }; +// TODO Reduce duplicate code with ReshapeNegGraphlet +class ReshapeLogisticGraphlet +{ +public: + ReshapeLogisticGraphlet() = default; + +public: + void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out) + { + std::vector<uint32_t> shape_out_v = shape_out; + + _reshape_shape = g->nodes()->create<luci::CircleConst>(); + _reshape = g->nodes()->create<luci::CircleReshape>(); + _logistic = g->nodes()->create<luci::CircleLogistic>(); + + _reshape_shape->dtype(loco::DataType::S32); + _reshape_shape->rank(1); + _reshape_shape->dim(0).set(shape_out_v.size()); + _reshape_shape->shape_status(luci::ShapeStatus::VALID); + // values + const auto size = shape_out_v.size(); + _reshape_shape->size<loco::DataType::S32>(size); + for (uint32_t i = 0; i < size; i++) + _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i]; + + _reshape_shape->name("reshape_shape"); + _reshape->name("reshape"); + _logistic->name("logistic"); + } + +protected: + luci::CircleReshape *_reshape = nullptr; + luci::CircleLogistic *_logistic = nullptr; + luci::CircleConst *_reshape_shape = nullptr; +}; + class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet { public: @@ -85,6 +121,26 @@ public: } }; +class ForwardReshapeToLogisticGraph : public TestIOGraph, public ReshapeLogisticGraphlet +{ +public: + ForwardReshapeToLogisticGraph() = default; + +public: + void init(const ShapeU32 shape_in, const ShapeU32 shape_out) + { + TestIOGraph::init(shape_in, shape_out); + ReshapeLogisticGraphlet::init(g(), shape_in, shape_out); + + // connect network + _reshape->tensor(input()); + _reshape->shape(_reshape_shape); + _logistic->x(_reshape); + + output()->from(_logistic); + } +}; + class ForwardReshapeToNegGraphTest : public ::testing::Test { public: @@ -101,6 +157,22 @@ protected: luci::ForwardReshapeToUnaryOpPass _pass; }; +class ForwardReshapeToLogisticGraphTest : public ::testing::Test +{ +public: + ForwardReshapeToLogisticGraphTest() = default; + + void run_pass(void) + { + while (_pass.run(_graph.g())) + ; + } + +protected: + ForwardReshapeToLogisticGraph _graph; + luci::ForwardReshapeToUnaryOpPass _pass; +}; + } // namespace TEST(ForwardReshapeToUnaryOpPassTest, name) @@ -123,3 +195,17 @@ TEST_F(ForwardReshapeToNegGraphTest, simple_forward) neg = dynamic_cast<luci::CircleNeg *>(reshape->tensor()); ASSERT_NE(nullptr, neg); } + +TEST_F(ForwardReshapeToLogisticGraphTest, forward) +{ + _graph.init({2, 2, 2}, {2, 4}); + + run_pass(); + + auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from()); + auto log = dynamic_cast<luci::CircleLogistic *>(_graph.output()->from()); + ASSERT_NE(nullptr, reshape); + ASSERT_EQ(nullptr, log); + log = dynamic_cast<luci::CircleLogistic *>(reshape->tensor()); + ASSERT_NE(nullptr, log); +} diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp index 97a962cb6..3cf31ed10 100644 --- a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp +++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp @@ -99,6 +99,12 @@ bool fuse_add_with_fc(luci::CircleFullyConnected *fc) fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i); } + // At this point, it is guarateed that fused_bias's shape is [1, 1, ..., N] or [N] + // where N is weights->dim(0). + // The shape is normalized to [N] to become the bias of FC + fused_bias->rank(1); + fused_bias->dim(0) = weights->dim(0); + fc->bias(fused_bias); fc->fusedActivationFunction(add->fusedActivationFunction()); diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp index 2bca57014..852bc8b63 100644 --- a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp +++ b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp @@ -37,10 +37,10 @@ namespace * \ | * [CircleTransposeConv] [CircleAdd] * | - * ([CircleRelu6]) + * ([CircleRelu/Relu6]) * | * - * Note: CircleRelu6 is inserted if Add activation is ReLU6 + * Note: CircleRelu/Relu6 is inserted if Add activation is ReLU6 */ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv) { @@ -65,7 +65,8 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv) if (add->dtype() != loco::DataType::FLOAT32) return false; if (add->fusedActivationFunction() != luci::FusedActFunc::NONE && - add->fusedActivationFunction() != luci::FusedActFunc::RELU6) + add->fusedActivationFunction() != luci::FusedActFunc::RELU6 && + add->fusedActivationFunction() != luci::FusedActFunc::RELU) return false; // get addition @@ -102,6 +103,19 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv) // remove add node replace(add).with(relu); } + else if (add->fusedActivationFunction() == luci::FusedActFunc::RELU) + { + auto name = addition->name(); + assert(name.length() > 0); + // separate relu op from add op + auto relu = add->graph()->nodes()->create<luci::CircleRelu>(); + relu->features(tconv); + relu->name(name + "/Relu"); + luci::add_origin(relu, luci::get_origin(add)); + + // remove add node + replace(add).with(relu); + } else { replace(add).with(tconv); diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp index 337954960..e6b54df36 100644 --- a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp +++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp @@ -29,7 +29,7 @@ namespace * NOTE TF's BatchNormalization is converted to Mul and Add. * * BEFORE - * | [CircleOutputExclude] + * | [CircleConst]/[CircleOutputExclude] * | / [CircleConst] * | / / * [CircleTransposeConv] [CircleConst] @@ -40,7 +40,7 @@ namespace * | * * AFTER - * | [CircleOutputExclude] + * | [CircleConst]/[CircleOutputExclude] * +-------------------------------------+ / [CircleConst] * | | / / * | [CircleTransposeConv] [CircleConst] @@ -69,9 +69,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) return false; // check scale and shift constant attributes - if (scale->rank() != 1) + // TODO maybe rank check is not needed + if (scale->rank() != 1 && scale->rank() != 4) return false; - if (shift->rank() != 1) + if (shift->rank() != 1 && shift->rank() != 4) return false; // check mul, add attributes if (mul->dtype() != loco::DataType::FLOAT32) @@ -82,9 +83,8 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) add->fusedActivationFunction() != luci::FusedActFunc::RELU6) return false; - // tconv bias should be not set - if (not dynamic_cast<luci::CircleOutputExclude *>(tconv->bias())) - return false; + // tconv bias is optional + auto bias = dynamic_cast<luci::CircleConst *>(tconv->bias()); // get weight of tconv auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter()); @@ -96,10 +96,36 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) return false; auto filter_out_chn = filter->dim(0).value(); - if (filter_out_chn != scale->dim(0).value()) + // allow scale/shift and bias shape of [N], [1,1,1,N]; BN works for "channel-wise" + auto srank = scale->rank() - 1; + if (filter_out_chn != scale->dim(srank).value()) return false; - if (filter_out_chn != shift->dim(0).value()) + for (uint32_t d = 0; d < srank; ++d) + { + if (1 != scale->dim(d).value()) + return false; + } + srank = shift->rank() - 1; + if (filter_out_chn != shift->dim(srank).value()) return false; + for (uint32_t d = 0; d < srank; ++d) + { + if (1 != shift->dim(d).value()) + return false; + } + if (bias) + { + if (bias->dtype() != loco::DataType::FLOAT32) + return false; + srank = bias->rank() - 1; + if (filter_out_chn != bias->dim(srank).value()) + return false; + for (uint32_t d = 0; d < srank; ++d) + { + if (1 != bias->dim(d).value()) + return false; + } + } auto name = add->name(); assert(name.length() > 0); @@ -151,6 +177,11 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) for (uint32_t c = 0; c < filter_out_chn; ++c) { fused_bias->at<loco::DataType::FLOAT32>(c) = shift->at<loco::DataType::FLOAT32>(c); + if (bias != nullptr) + { + fused_bias->at<loco::DataType::FLOAT32>(c) += + bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c); + } } fused_bias->name(name + "/TransposeConv/bias"); @@ -166,6 +197,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add) luci::add_origin(fused_tconv, luci::composite_origin( {luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)})); + if (bias != nullptr) + { + luci::add_origin(fused_tconv, luci::get_origin(bias)); + } if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6) { diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp index f3ec6cd9e..10a651e35 100644 --- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp +++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp @@ -325,6 +325,10 @@ public: } private: + bool condition_common_1_5(uint32_t ifm_channel_depth); + bool condition_common_3_4(); + +private: template <enum PatternVersion> bool match(); public: @@ -368,21 +372,8 @@ private: if (not(condition)) \ return false; -template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>() +bool InstanceNormPattern::condition_common_1_5(uint32_t ifm_channel_depth) { - CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal)); - CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm)); - - auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm); - CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID); - CHECK_OR_FALSE(ifm_circle->rank() == 4); - CHECK_OR_FALSE(ifm_circle->dim(3).known()); - uint32_t ifm_channel_depth = ifm_circle->dim(3).value(); - - CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma)); - - CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth)); - add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x()); CHECK_OR_FALSE(add_as_variance); @@ -408,6 +399,70 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion: CHECK_OR_FALSE(const_as_beta); CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth)); + return true; +} + +bool InstanceNormPattern::condition_common_3_4() +{ + // check left sub + ifm = sub->x(); + CHECK_OR_FALSE(ifm); + + luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm); + CHECK_OR_FALSE(ifm_node->rank() == 4); + CHECK_OR_FALSE(ifm_node->dim(3).known()); + + mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y()); + CHECK_OR_FALSE(mean_of_ifm); + CHECK_OR_FALSE(ifm == mean_of_ifm->input()); + + // continue search from add_as_variance + CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance)); + CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); + // TODO Support regarding broadcast + CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1); + + mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x()); + CHECK_OR_FALSE(mean_as_variance); + + square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input()); + CHECK_OR_FALSE(square); + + sub_2 = dynamic_cast<luci::CircleSub *>(square->x()); + CHECK_OR_FALSE(sub_2); + CHECK_OR_FALSE(ifm == sub_2->x()); + + mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y()); + CHECK_OR_FALSE(mean_of_ifm_2); + CHECK_OR_FALSE(ifm == mean_of_ifm_2->input()); + + loco::Node *ifm_should_be = nullptr; + luci::CircleMean *mean_of_ifm_2_should_be = nullptr; + CHECK_OR_FALSE( + luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2)); + CHECK_OR_FALSE(ifm == ifm_should_be); + CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be); + + return true; +} + +template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>() +{ + CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal)); + CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm)); + + auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm); + CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID); + CHECK_OR_FALSE(ifm_circle->rank() == 4); + CHECK_OR_FALSE(ifm_circle->dim(3).known()); + uint32_t ifm_channel_depth = ifm_circle->dim(3).value(); + + CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma)); + + CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth)); + + CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth)); + luci::CircleMul *mul_gamma_should_be = nullptr; luci::CircleMean *mean_of_ifm_should_be = nullptr; @@ -488,44 +543,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion: CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma)); CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div)); - // check left sub - ifm = sub->x(); - CHECK_OR_FALSE(ifm); - - luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm); - CHECK_OR_FALSE(ifm_node->rank() == 4); - CHECK_OR_FALSE(ifm_node->dim(3).known()); - - mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y()); - CHECK_OR_FALSE(mean_of_ifm); - CHECK_OR_FALSE(ifm == mean_of_ifm->input()); - - // continue search from add_as_variance - CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance)); - CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); - // TODO Support regarding broadcast - CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1); - - mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x()); - CHECK_OR_FALSE(mean_as_variance); - - square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input()); - CHECK_OR_FALSE(square); - - sub_2 = dynamic_cast<luci::CircleSub *>(square->x()); - CHECK_OR_FALSE(sub_2); - CHECK_OR_FALSE(ifm == sub_2->x()); - - mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y()); - CHECK_OR_FALSE(mean_of_ifm_2); - CHECK_OR_FALSE(ifm == mean_of_ifm_2->input()); - - loco::Node *ifm_should_be = nullptr; - luci::CircleMean *mean_of_ifm_2_should_be = nullptr; - CHECK_OR_FALSE( - luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2)); - CHECK_OR_FALSE(ifm == ifm_should_be); - CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be); + CHECK_OR_FALSE(condition_common_3_4()); _matched = true; return true; @@ -546,44 +564,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion: CHECK_OR_FALSE(div); CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div)); - // check left sub - ifm = sub->x(); - CHECK_OR_FALSE(ifm); - - luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm); - CHECK_OR_FALSE(ifm_node->rank() == 4); - CHECK_OR_FALSE(ifm_node->dim(3).known()); - - mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y()); - CHECK_OR_FALSE(mean_of_ifm); - CHECK_OR_FALSE(ifm == mean_of_ifm->input()); - - // continue search from add_as_variance - CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance)); - CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); - // TODO Support regarding broadcast - CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1); - - mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x()); - CHECK_OR_FALSE(mean_as_variance); - - square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input()); - CHECK_OR_FALSE(square); - - sub_2 = dynamic_cast<luci::CircleSub *>(square->x()); - CHECK_OR_FALSE(sub_2); - CHECK_OR_FALSE(ifm == sub_2->x()); - - mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y()); - CHECK_OR_FALSE(mean_of_ifm_2); - CHECK_OR_FALSE(ifm == mean_of_ifm_2->input()); - - loco::Node *ifm_should_be = nullptr; - luci::CircleMean *mean_of_ifm_2_should_be = nullptr; - CHECK_OR_FALSE( - luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2)); - CHECK_OR_FALSE(ifm == ifm_should_be); - CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be); + CHECK_OR_FALSE(condition_common_3_4()); assert(const_as_gamma == nullptr); assert(const_as_beta == nullptr); @@ -612,30 +593,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion: CHECK_OR_FALSE(ifm_circle->dim(3).known()); uint32_t ifm_channel_depth = ifm_circle->dim(3).value(); - add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x()); - CHECK_OR_FALSE(add_as_variance); - - CHECK_OR_FALSE( - luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance)); - - CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32); - // TODO Support regarding broadcast - CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1); - - CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance)); - - sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input()); - CHECK_OR_FALSE(sqdiff); - - loco::Node *ifm_should_be = nullptr; - CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff)); - CHECK_OR_FALSE(ifm == ifm_should_be); - CHECK_OR_FALSE(is_instance_mean_v1(mean_of_ifm)); - CHECK_OR_FALSE(ifm == mean_of_ifm->input()); - - const_as_beta = dynamic_cast<luci::CircleConst *>(sub->x()); - CHECK_OR_FALSE(const_as_beta); - CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth)); + CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth)); luci::CircleRsqrt *rsqrt_should_be = nullptr; luci::CircleMean *mean_of_ifm_should_be = nullptr; diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp index b4975486d..e8fa2a478 100644 --- a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp +++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp @@ -23,6 +23,7 @@ #include <luci/Log.h> #include <cmath> +#include <limits> namespace { diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp index 003e4c293..aaadb2864 100644 --- a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp +++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp @@ -138,13 +138,18 @@ struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool auto qtype = luci::activation_qtype(input_node); switch (qtype) { - case luci::ActivationQType::PreDefinedValue: - node->quantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype())); + case luci::ActivationQType::PreDefinedLogistic: + case luci::ActivationQType::PreDefinedTanh: + case luci::ActivationQType::PreDefinedSoftmax: + node->quantparam(luci::make_predefined_qparam(qtype, node->dtype())); break; case luci::ActivationQType::IntScale: luci::set_int_scale(node); break; default: + // This assert ensures this switch-satement handles all ActivationQTypes + // TODO Find a better design to remove coupling with ActivationQType + assert(qtype == luci::ActivationQType::MinMax); break; } diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp index ad86cedf4..06a4ae9f6 100644 --- a/compiler/luci/pass/src/QuantizationUtils.cpp +++ b/compiler/luci/pass/src/QuantizationUtils.cpp @@ -20,6 +20,7 @@ #include <iostream> #include <cmath> +#include <limits> namespace luci { @@ -276,31 +277,70 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices) indices[2] * dimension.dim(3).value() + indices[3]; } +// Activation (ofm) qtype is determined in different ways. +// 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH) +// 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL) +// 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE, +// TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details). ActivationQType activation_qtype(const CircleNode *node) { auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node); if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) - return ActivationQType::PreDefinedValue; + return ActivationQType::PreDefinedTanh; + +#define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT) \ + { \ + auto n = loco::must_cast<const CLASS *>(node); \ + auto input = loco::must_cast<CircleNode *>(n->INPUT()); \ + return activation_qtype(input); \ + } switch (node->opcode()) { case CircleOpcode::LOGISTIC: + return ActivationQType::PreDefinedLogistic; case CircleOpcode::TANH: + return ActivationQType::PreDefinedTanh; case CircleOpcode::SOFTMAX: - return ActivationQType::PreDefinedValue; + return ActivationQType::PreDefinedSoftmax; case CircleOpcode::FLOOR: case CircleOpcode::FLOOR_DIV: case CircleOpcode::FLOOR_MOD: case CircleOpcode::CEIL: return ActivationQType::IntScale; + case CircleOpcode::GATHER: + RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params); + case CircleOpcode::RESHAPE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor); + case CircleOpcode::TRANSPOSE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a); + case CircleOpcode::STRIDED_SLICE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input); + case CircleOpcode::SPLIT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input); + case CircleOpcode::CIRCLESPLITOUT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input); + case CircleOpcode::SPLIT_V: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input); + case CircleOpcode::CIRCLESPLITVOUT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input); + case CircleOpcode::UNPACK: + RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value); + case CircleOpcode::CIRCLEUNPACKOUT: + RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input); + case CircleOpcode::QUANTIZE: + RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input); default: break; } +#undef RETURN_INPUT_ACTIVATION_QTYPE + return ActivationQType::MinMax; } -std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype) +std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype, + loco::DataType dtype) { auto qparam = std::make_unique<CircleQuantParam>(); @@ -309,9 +349,9 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo qparam->zerop.emplace_back(zp); }; - switch (opcode) + switch (qtype) { - case CircleOpcode::LOGISTIC: + case ActivationQType::PreDefinedLogistic: if (dtype == loco::DataType::U8) set_qparam(1.0f / 256.0f, 0); else @@ -320,7 +360,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo set_qparam(1.0f / 32768.0f, 0); } break; - case CircleOpcode::TANH: + case ActivationQType::PreDefinedTanh: if (dtype == loco::DataType::U8) set_qparam(2.0f / 256.0f, 128); else @@ -329,7 +369,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo set_qparam(1.0f / 32768.0f, 0); } break; - case CircleOpcode::SOFTMAX: + case ActivationQType::PreDefinedSoftmax: if (dtype == loco::DataType::U8) set_qparam(1.0f / 255.0f, 0); else @@ -341,7 +381,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo default: throw std::runtime_error("Unsupported opcode with pre-defined qparam"); } - return std::move(qparam); + return qparam; } // For nodes with integer output, we use integer scale @@ -395,4 +435,74 @@ void quant_const(luci::CircleConst *node, loco::DataType quant_type) node->quantparam(std::move(quantparam)); } +namespace +{ + +// TODO move this to a more global helper file +int nbits(loco::DataType dt) noexcept +{ + switch (dt) + { + case loco::DataType::S8: + case loco::DataType::U8: + return 8; + case loco::DataType::S16: + case loco::DataType::U16: + case loco::DataType::FLOAT16: + return 16; + case loco::DataType::S32: + case loco::DataType::U32: + case loco::DataType::FLOAT32: + return 32; + case loco::DataType::S64: + return 64; + default: + return 64; // a safe large default + } +} + +// TODO Check if the metric is valid +// Returns true if [min,max] is poorly representable +bool range_check(float min, float max, loco::DataType dtype) +{ + float thresh = 1.5f; + return log2f(max) - log2f(min) > nbits(dtype) * thresh; +} + +bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n) +{ + float min, max; + // estimate min/max + switch (n->dtype()) + { + case loco::DataType::U8: + min = scale * (0 - zp); + max = scale * (255 - zp); + break; + case loco::DataType::S16: + min = scale * (-32767); + max = scale * (32767); + break; + default: + return false; + } + return range_check(min, max, n->dtype()); +} + +} // namespace + +void warn_accuracy_with_range(luci::CircleNode *n) +{ + LOGGER(l); + auto qp = n->quantparam(); + auto k = qp->zerop.size(); + for (uint32_t i = 0; i < k; i++) + { + if (warn_scale_zp(qp->scale[i], qp->zerop[i], n)) + WARN(l) << "Quantization of " << i << "-th channel of " << n->name() + << "'s quantization may cause accuracy issues" << std::endl; + ; + } +} + } // namespace luci diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h index cd8cec95a..4d5316ccb 100644 --- a/compiler/luci/pass/src/QuantizationUtils.h +++ b/compiler/luci/pass/src/QuantizationUtils.h @@ -62,15 +62,19 @@ bool is_quantized(const CircleNode *node); enum ActivationQType { - MinMax, // Quantize using recorded min/max - PreDefinedValue, // Quantize using pre-defined values - IntScale, // Round scale to a positive integer + MinMax, // Quantize using recorded min/max + PreDefinedLogistic, // Quantize using pre-defined values + PreDefinedTanh, // Quantize using pre-defined values + PreDefinedSoftmax, // Quantize using pre-defined values + IntScale, // Round scale to a positive integer }; ActivationQType activation_qtype(const CircleNode *node); // Create qparam with pre-defined values for speical operators -std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype); +std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleNode *node, loco::DataType dtype); +std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype, + loco::DataType dtype); // Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil) void set_int_scale(luci::CircleNode *node); @@ -78,6 +82,10 @@ void set_int_scale(luci::CircleNode *node); // Quantize const tensor using its min/max values void quant_const(luci::CircleConst *node, loco::DataType quant_type); +// Check that a node is quantized without significant loss of precision; +// Emits warnings to log with WARN +void warn_accuracy_with_range(luci::CircleNode *n); + } // namespace luci #endif // __LUCI_QUANTIZATION_UTILS_H__ diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp index 149331824..95251a82c 100644 --- a/compiler/luci/pass/src/QuantizeActivation.cpp +++ b/compiler/luci/pass/src/QuantizeActivation.cpp @@ -114,29 +114,26 @@ void QuantizeSpecialActivation::visit(luci::CircleNode *node) auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node); if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) { - auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type); node->quantparam(std::move(qparam)); } } void QuantizeSpecialActivation::visit(luci::CircleLogistic *node) { - assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); - auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedLogistic, output_type); node->quantparam(std::move(qparam)); } void QuantizeSpecialActivation::visit(luci::CircleTanh *node) { - assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); - auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type); node->quantparam(std::move(qparam)); } void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node) { - assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue); - auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type); + auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedSoftmax, output_type); node->quantparam(std::move(qparam)); } diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp index aa496232a..de97a14dd 100644 --- a/compiler/luci/pass/src/QuantizeBias.cpp +++ b/compiler/luci/pass/src/QuantizeBias.cpp @@ -22,6 +22,7 @@ #include <algorithm> #include <cmath> +#include <limits> using namespace luci; @@ -201,6 +202,18 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w std::vector<float> scaling_factor(size); std::vector<int64_t> zp(size); + if (const_bias->rank() == 0) + { + // TODO Support quantization of scalar bias + throw std::runtime_error("Quantization of scalar bias is not yet supported (" + + const_bias->name() + ")"); + } + if (size != const_bias->dim(const_bias->rank() - 1).value()) + { + throw std::runtime_error(const_bias->name() + + " (bias) should have the shape of [1, 1, .. 1, channel]"); + } + if (output_type == loco::DataType::U8) { new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp); @@ -218,6 +231,7 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w auto quantparam = std::make_unique<CircleQuantParam>(); quantparam->scale = scaling_factor; quantparam->zerop = zp; + quantparam->quantized_dimension = const_bias->rank() - 1; assert(new_bias->quantparam() == nullptr); // bias should not be quantized before new_bias->quantparam(std::move(quantparam)); diff --git a/compiler/luci/pass/src/QuantizeBias.test.cpp b/compiler/luci/pass/src/QuantizeBias.test.cpp new file mode 100644 index 000000000..0104a191b --- /dev/null +++ b/compiler/luci/pass/src/QuantizeBias.test.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "QuantizeBias.h" + +#include <luci/test/TestIOGraph.h> +#include <luci/IR/CircleNodes.h> +#include <luci/IR/CircleQuantParam.h> + +#include <gtest/gtest.h> + +using namespace luci; + +namespace +{ + +using namespace luci::test; + +// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template <typename T> +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector<uint32_t> &shape, T value) +{ + auto node = g->nodes()->create<luci::CircleConst>(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size<DT>(size); \ + for (uint32_t i = 0; i < size; ++i) \ + node->at<DT>(i) = value; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} + +/** + * Simple graph for test + * + * BEFORE + * + * [IFM] [WEIGHTS] [BIAS(FP32)] + * \ | / + * [FC] + * | + * [OFM] + * + * AFTER + * + * [IFM] [WEIGHTS] [BIAS(Quantized)] + * \ | / + * [FC] + * | + * [OFM] + */ +struct Q8FCGraphlet +{ +public: + Q8FCGraphlet() = default; + virtual ~Q8FCGraphlet() = default; + + void init(loco::Graph *g, const ShapeU32 out_shape, const ShapeU32 w_shape, + const ShapeU32 bias_shape, const float bv) + { + _fc = g->nodes()->create<luci::CircleFullyConnected>(); + _fc->input(_x); + _x->dtype(loco::DataType::U8); + { + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->scale.push_back(1.0); + quantparam->zerop.push_back(0); + quantparam->quantized_dimension = 0; + _x->quantparam(std::move(quantparam)); + } + + auto weights = create_const_node<uint8_t>(g, loco::DataType::U8, w_shape, 1.0); + auto w_qparam = std::make_unique<CircleQuantParam>(); + std::vector<float> w_scale(weights->dim(0).value(), 1.0); + std::vector<int64_t> w_zp(weights->dim(0).value(), 0); + w_qparam->scale = w_scale; + w_qparam->zerop = w_zp; + w_qparam->quantized_dimension = 0; + weights->quantparam(std::move(w_qparam)); + _fc->weights(weights); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->dtype(loco::DataType::U8); + _fc->shape(out_shape); + auto l = _fc->dim(_fc->rank() - 1).value(); + _fc->bias(create_const_node(g, loco::DataType::FLOAT32, bias_shape, bv)); + _fc->name("fc"); + { + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->scale.push_back(1.0); + quantparam->zerop.push_back(0); + quantparam->quantized_dimension = 0; + _fc->quantparam(std::move(quantparam)); + } + } + +public: + luci::CircleFullyConnected *fc() { return _fc; } + +protected: + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleInput *_x = nullptr; +}; + +struct Q8FCGraph final : public TestIGraphlet, public TestOGraphlet, public Q8FCGraphlet +{ + void init(const ShapeU32 in_shape, const ShapeU32 w_shape, const ShapeU32 out_shape, + const ShapeU32 bias_shape, const float bv) + { + TestIGraphlet::init(g(), in_shape); + TestOGraphlet::init(g(), out_shape); + _x = input(); + Q8FCGraphlet::init(g(), out_shape, w_shape, bias_shape, bv); + output()->from(_fc); + } +}; + +class CQ8QuantizeBiasFCTest : public ::testing::Test +{ +public: + Q8FCGraph g; + luci::QuantizeBias qb{loco::DataType::FLOAT32, loco::DataType::U8, + luci::QuantizationGranularity::ChannelWise}; +}; + +} // namespace + +TEST_F(CQ8QuantizeBiasFCTest, fully_connected) +{ + g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 256}, 1); + g.fc()->accept(&qb); + + auto bias = loco::must_cast<CircleConst *>(g.fc()->bias()); + auto qparam = bias->quantparam(); + + EXPECT_NE(nullptr, qparam); + EXPECT_EQ(256, qparam->scale.size()); + EXPECT_EQ(256, qparam->zerop.size()); + EXPECT_EQ(1, qparam->quantized_dimension); +} + +TEST_F(CQ8QuantizeBiasFCTest, wrong_bias_shape_NEG) +{ + g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 2, 128}, 1); + EXPECT_ANY_THROW(g.fc()->accept(&qb)); // Wrong bias shape +} diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp index c9b35e0be..ef047d35d 100644 --- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp +++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp @@ -27,6 +27,7 @@ #include <iostream> #include <cmath> #include <functional> +#include <limits> namespace { @@ -352,15 +353,15 @@ private: private: // Check if // 1. node is const - // 2. node was not quantized + // 2. node's dtype is float32 bool is_quantizable(loco::Node *node) { auto const_node = dynamic_cast<luci::CircleConst *>(node); if (not const_node) return false; - // Skip if this is already quantized - if (is_quantized(const_node)) + // Skip if this is not float32 + if (const_node->dtype() != loco::DataType::FLOAT32) return false; return true; diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp index 11322ab44..500ae12ed 100644 --- a/compiler/luci/pass/src/QuantizeWeights.cpp +++ b/compiler/luci/pass/src/QuantizeWeights.cpp @@ -23,6 +23,7 @@ #include <cmath> #include <vector> #include <functional> +#include <limits> using namespace luci; diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index d9a9d4db7..005144516 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -41,10 +41,28 @@ namespace { using namespace luci; + +bool use_predefined_values(ActivationQType qtype) +{ + switch (qtype) + { + case ActivationQType::PreDefinedLogistic: + case ActivationQType::PreDefinedTanh: + case ActivationQType::PreDefinedSoftmax: + return true; + default: + // This ensures this switch-statement handles all ActivationQTypes + assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax); + break; + } + + return false; +} + // Create a Quantize Op whose // dtype is out_type // shape is the same with node -// qparam is computed using node's min/max +// qparam is computed according to node's qtype luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type) { auto quantize = node->graph()->nodes()->create<CircleQuantize>(); @@ -60,9 +78,9 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType assert(qparam); // FIX_CALLER_UNLESS auto qtype = luci::activation_qtype(node); - if (qtype == ActivationQType::PreDefinedValue) + if (use_predefined_values(qtype)) { - quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type)); + quantize->quantparam(luci::make_predefined_qparam(qtype, out_type)); return quantize; } @@ -105,6 +123,23 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType return quantize; } +// Create Dequantize Op whose shape is the same with node +luci::CircleDequantize *create_dequantize(luci::CircleNode *node) +{ + auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>(); + dequantize->name(node->name() + "_Dequantize"); + dequantize->dtype(loco::DataType::FLOAT32); + dequantize->rank(node->rank()); + for (uint32_t i = 0; i < node->rank(); i++) + dequantize->dim(i).set(node->dim(i).value()); + + dequantize->shape_status(luci::ShapeStatus::VALID); + + luci::add_origin(dequantize, luci::get_origin(node)); + + return dequantize; +} + } // namespace namespace luci @@ -229,11 +264,13 @@ private: INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x) INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input) @@ -241,6 +278,7 @@ private: INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input) @@ -250,6 +288,7 @@ private: INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input) + INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input) INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input) @@ -353,7 +392,9 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const luci::add_origin(quant_op, luci::get_origin(succ)); } - // Requantize input + // Update qparam of input + // This step is skipped if input_type is float32 + if (_ctx->input_type != loco::DataType::FLOAT32) { auto quantparam = input->quantparam(); assert(quantparam); @@ -376,11 +417,13 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const assert(_ctx->input_type == loco::DataType::S16); compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max); } - input->dtype(_ctx->input_type); input->quantparam()->scale[0] = scaling_factor; input->quantparam()->zerop[0] = zp; } + // Update dtype of input + input->dtype(_ctx->input_type); + auto graph_input = inputs->at(input->index()); graph_input->dtype(_ctx->input_type); } @@ -405,13 +448,26 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const if (not from->quantparam()) continue; - // Insert Quantize Op - auto quant_op = create_quantize_op(from, _ctx->output_type); - loco::replace(from).with(quant_op); - quant_op->input(from); + // Insert Dequantize Op for float32 output_type + if (_ctx->output_type == loco::DataType::FLOAT32) + { + auto dequant_op = create_dequantize(from); + loco::replace(from).with(dequant_op); + dequant_op->input(from); + } + else + { + // Insert Quantize Op for non-float32 output_type + auto quant_op = create_quantize_op(from, _ctx->output_type); + loco::replace(from).with(quant_op); + quant_op->input(from); - // TODO Set a proper origin (Quantize should have its own Origin) - luci::add_origin(quant_op, luci::get_origin(from)); + // TODO Set a proper origin (Quantize should have its own Origin) + luci::add_origin(quant_op, luci::get_origin(from)); + } + + // Update dtype of output + output->dtype(_ctx->output_type); auto graph_output = outputs->at(output->index()); graph_output->dtype(_ctx->output_type); @@ -594,12 +650,25 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) // Set output type set_output_type(g); + // Remove redundant Quantize Op + { + logo::Phase phase; + + phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>()); + + ProgressReporter prog(g, logo::PhaseStrategy::Saturate); + logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g}; + phase_runner.attach(&prog); + phase_runner.run(phase); + } + // Remove min/max values for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast<luci::CircleNode *>(node); if (auto qparam = circle_node->quantparam()) { + warn_accuracy_with_range(circle_node); qparam->min.clear(); qparam->max.clear(); } diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp index cebafd32b..21b4fe1c6 100644 --- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp +++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp @@ -1088,6 +1088,31 @@ private: luci::CircleConst *_const = nullptr; }; +class ReduceMaxTestGraph final : public SimpleTestGraph +{ +public: + void init(void) override + { + TestIOGraph::init({4, 3, 2}, {2}); + + _axis = create_const<Type::S32, int32_t>(g(), {4}, {1, 0, -3, -3}); + _reduce_max = g()->nodes()->create<luci::CircleReduceMax>(); + { + _reduce_max->input(input()); + _reduce_max->reduction_indices(_axis); + _reduce_max->name("test"); + _reduce_max->keep_dims(false); + } + output()->from(_reduce_max); + + set_minmax_to_non_const(g(), -1, 1); + } + +private: + luci::CircleReduceMax *_reduce_max = nullptr; + luci::CircleConst *_axis = nullptr; +}; + class ResizeBilinearTestGraph final : public SimpleTestGraph { public: @@ -2345,6 +2370,34 @@ TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG) SUCCEED(); } +TEST(QuantizedModelVerifierTest, ReduceMax) +{ + TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise); + + TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, ReduceMax_wrong_type_NEG) +{ + TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, ReduceMax_wrong_granularity_NEG) +{ + TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + TEST(QuantizedModelVerifierTest, ResizeBilinear) { TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise); diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp new file mode 100644 index 000000000..66cd9d791 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantDequantizePass.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ + +bool remove_redundant_dequant(luci::CircleDequantize *dequant) +{ + assert(dequant != nullptr); + + auto prev = loco::must_cast<luci::CircleNode *>(dequant->input()); + if (prev->dtype() != loco::DataType::FLOAT32) + return false; + + replace(dequant).with(prev); + + return true; +} + +} // namespace + +namespace luci +{ +/** + * Dequantize Op does the below things on the ifm. + * 1. Element-wise update of quantized values (u8/s16) to fp32 values + * 2. Update dtype to fp32 + * If the previous node is not quantized, dequantize Op is redundant. + * + * BEFORE + * + * [CircleNode (A)] + * | + * [CircleNode (B)] (fp32) + * | + * [CircleDequantize] + * | + * [CircleNode] + * + * AFTER + * + * [CircleNode (A)] + * | + * [CircleNode (B)] (fp32) + * | + * [CircleNode] + */ +bool RemoveRedundantDequantizePass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto target_node = dynamic_cast<luci::CircleDequantize *>(node); + if (target_node != nullptr) + { + if (remove_redundant_dequant(target_node)) + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp new file mode 100644 index 000000000..adb2f14a4 --- /dev/null +++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveRedundantDequantizePass.h" + +#include <luci/IR/CircleNodes.h> + +#include <luci/test/TestIOGraph.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +class DequantizeGraphlet +{ +public: + DequantizeGraphlet() = default; + +public: + void init(loco::Graph *g) + { + _dequantize = g->nodes()->create<luci::CircleDequantize>(); + _dequantize->dtype(loco::DataType::FLOAT32); + _dequantize->name("dequantize"); + } + +protected: + luci::CircleDequantize *_dequantize = nullptr; +}; + +class RedundantDequantizeGraph : public TestIOGraph, public DequantizeGraphlet +{ +public: + RedundantDequantizeGraph() = default; + +public: + void init(void) + { + TestIOGraph::init({1}, {1}); + DequantizeGraphlet::init(g()); + + _dequantize->input(input()); + + output()->from(_dequantize); + } + + void init_u8_input(void) + { + TestIOGraph::init({1}, {1}); + DequantizeGraphlet::init(g()); + + // Use u8 input (dequantize is not redundant anymore) + input()->dtype(loco::DataType::U8); + { + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->scale = {1}; + qparam->zerop = {1}; + input()->quantparam(std::move(qparam)); + } + + _dequantize->input(input()); + + output()->from(_dequantize); + } +}; + +} // namespace + +TEST(RemoveRedundantDequantizePass, single_redundant_dequantize) +{ + RedundantDequantizeGraph g; + luci::RemoveRedundantDequantizePass pass; + + g.init(); + + EXPECT_TRUE(pass.run(g.g())); + + int count = 0; + for (auto node : loco::active_nodes(loco::output_nodes(g.g()))) + { + if (dynamic_cast<luci::CircleDequantize *>(node)) + { + count++; + } + } + + ASSERT_EQ(0, count); +} + +TEST(RemoveRedundantDequantizePass, wrong_dtype_NEG) +{ + RedundantDequantizeGraph g; + luci::RemoveRedundantDequantizePass pass; + + g.init_u8_input(); + + EXPECT_FALSE(pass.run(g.g())); +} diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp new file mode 100644 index 000000000..476ec68bf --- /dev/null +++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h" + +#include <luci/IR/CircleNodes.h> + +namespace +{ + +bool acceptable_intermediate_op(const loco::Node *node) +{ + if (not node) + return false; + + const auto opcode = loco::must_cast<const luci::CircleNode *>(node)->opcode(); + + switch (opcode) + { + case luci::CircleOpcode::ADD: + case luci::CircleOpcode::MUL: + case luci::CircleOpcode::TANH: + case luci::CircleOpcode::LOGISTIC: + break; + + default: + return false; + } + + return true; +} + +bool same_shape(const loco::Node *a, const loco::Node *b) +{ + auto a_cnode = loco::must_cast<const luci::CircleNode *>(a); + auto b_cnode = loco::must_cast<const luci::CircleNode *>(b); + + if (a_cnode->rank() != b_cnode->rank()) + return false; + + for (uint32_t i = 0; i < a_cnode->rank(); i++) + { + if (not(a_cnode->dim(i) == b_cnode->dim(i))) + return false; + } + return true; +} + +class PreReshapeFinder +{ +public: + PreReshapeFinder(const luci::CircleReshape *post_reshape) : _post_reshape(post_reshape) + { + assert(post_reshape != nullptr); // FIX_CALLER_UNLESS + } + +public: + // Return true if pre_reshapes are found + bool collect_pre_reshapes(loco::Node *node) + { + // TODO Support diamond case + if (loco::succs(node).size() != 1) + return false; + + if (auto pre_reshape = dynamic_cast<luci::CircleReshape *>(node)) + { + // Check ifm of pre-reshape and ofm of post_reshape + if (not same_shape(pre_reshape->tensor(), _post_reshape)) + return false; + + // Check ofm of pre-reshape and ifm of post_reshape + if (not same_shape(pre_reshape, _post_reshape->tensor())) + return false; + + _pre_reshapes.emplace_back(pre_reshape); + return true; + } + + if (not acceptable_intermediate_op(node)) + return false; + + for (uint32_t i = 0; i < node->arity(); i++) + { + if (not collect_pre_reshapes(node->arg(i))) + return false; + } + + return true; + } + +public: + std::vector<luci::CircleReshape *> pre_reshapes(void) const { return _pre_reshapes; } + +private: + const luci::CircleReshape *_post_reshape = nullptr; + std::vector<luci::CircleReshape *> _pre_reshapes; +}; + +bool remove_unnecessary_reshape_net(luci::CircleReshape *reshape) +{ + PreReshapeFinder finder(reshape); + if (not finder.collect_pre_reshapes(reshape->tensor())) + return false; + + // Remove pre_reshapes + for (auto pre_reshape : finder.pre_reshapes()) + { + loco::replace(pre_reshape).with(pre_reshape->tensor()); + } + + // Remove post_reshape + loco::replace(reshape).with(reshape->tensor()); + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * BEFORE + * + * [CircleNode] + * | + * [CircleReshape_1] (shape: A -> B) + * | + * [CircleNode] (ex: Add/Mul/Tanh/Logistic ..) + * | + * [CircleReshape_2] (shape: B -> A) + * | + * [CircleNode] + * + * AFTER + * + * [CircleNode] + * | \ + * | [CircleReshape_1] + * [CircleNode] + * | \ + * | [CircleReshape_2] + * [CircleNode] + **/ +bool RemoveUnnecessaryReshapeNetPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node)) + { + if (remove_unnecessary_reshape_net(reshape_node)) + changed = true; + } + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp new file mode 100644 index 000000000..4ad707ba3 --- /dev/null +++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +class RemoveUnnecessaryReshapeNet : public ::testing::Test +{ +public: + RemoveUnnecessaryReshapeNet() {} + + void createReshapeConst(luci::CircleReshape *target, const std::vector<uint32_t> shape) + { + auto shape_const = g.nodes()->create<luci::CircleConst>(); + shape_const->dtype(loco::DataType::S32); + shape_const->size<loco::DataType::S32>(shape.size()); + shape_const->shape_status(luci::ShapeStatus::VALID); + shape_const->rank(1); + shape_const->dim(0).set(shape.size()); + for (int32_t i = 0; i < shape.size(); i++) + { + shape_const->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape.at(i)); + } + shape_const->name("shape_const"); + target->shape(shape_const); + target->rank(shape.size()); + for (uint32_t i = 0; i < shape.size(); i++) + { + target->dim(i) = shape[i]; + } + target->shape_status(luci::ShapeStatus::VALID); + } + + void buildGraph(const std::initializer_list<uint32_t> base_shape, + const std::initializer_list<uint32_t> first_shape, + const std::initializer_list<uint32_t> second_shape) + { + // Input Create. + input = g.nodes()->create<luci::CircleInput>(); + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + input->shape_status(luci::ShapeStatus::VALID); + input->shape(base_shape); + input->name("input"); + + // Create first reshape. + first_reshape = g.nodes()->create<luci::CircleReshape>(); + first_reshape->tensor(input); + first_reshape->name("Reshape"); + createReshapeConst(first_reshape, first_shape); + + // Create logistic. + logistic = g.nodes()->create<luci::CircleLogistic>(); + logistic->x(first_reshape); + logistic->name("logistic"); + logistic->shape(first_shape); + logistic->shape_status(luci::ShapeStatus::VALID); + + // Create second reshape. + second_reshape = g.nodes()->create<luci::CircleReshape>(); + second_reshape->tensor(logistic); + second_reshape->name("second_reshape"); + createReshapeConst(second_reshape, second_shape); + + // Output Connect. + output = g.nodes()->create<luci::CircleOutput>(); + output->from(second_reshape); + output->name("output"); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + } + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleReshape *first_reshape = nullptr; + luci::CircleLogistic *logistic = nullptr; + luci::CircleReshape *second_reshape = nullptr; + luci::CircleOutput *output = nullptr; +}; + +} // namespace + +TEST_F(RemoveUnnecessaryReshapeNet, simple_case) +{ + buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 1, 32}); + luci::RemoveUnnecessaryReshapeNetPass pass; + + ASSERT_TRUE(pass.run(&g)); + + int count = 0; + for (auto node : loco::active_nodes(loco::output_nodes(&g))) + { + if (auto reshape = dynamic_cast<luci::CircleReshape *>(node)) + count++; + } + ASSERT_EQ(0, count); +} + +TEST_F(RemoveUnnecessaryReshapeNet, shape_mismatch_NEG) +{ + buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 2, 16}); + luci::RemoveUnnecessaryReshapeNetPass pass; + ASSERT_FALSE(pass.run(&g)); +} diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp new file mode 100644 index 000000000..741b70956 --- /dev/null +++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <luci/IR/CircleNodes.h> +#include <luci/Profile/CircleNodeOrigin.h> +#include <luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h> + +namespace +{ + +// TODO move to global helper list if needed +/** + * @brief Create a node with `inp` as input from fused activation fucntion `act` + */ +luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act) +{ + switch (act) + { + case luci::FusedActFunc::NONE: + return inp; + case luci::FusedActFunc::RELU: + { + auto n = inp->graph()->nodes()->create<luci::CircleRelu>(); + n->features(inp); + return n; + } + case luci::FusedActFunc::RELU6: + { + auto n = inp->graph()->nodes()->create<luci::CircleRelu6>(); + n->features(inp); + return n; + } + case luci::FusedActFunc::RELU_N1_TO_1: + { + auto n = inp->graph()->nodes()->create<luci::CircleReluN1To1>(); + n->features(inp); + return n; + } + case luci::FusedActFunc::TANH: + { + auto n = inp->graph()->nodes()->create<luci::CircleTanh>(); + n->x(inp); + return n; + } + case luci::FusedActFunc::SIGN_BIT: + { + throw std::invalid_argument("no matching node to create from fused activation"); + } + default: + throw std::invalid_argument("invalid fused activation"); + } +} + +/** + * Replace Fully Connected with Batched MatMul + * + * BEFORE + * + * [Node1] [Node2] + * | | + * [transpose]? [transpose]? + * \ / + * [FullyConnected] + * + * AFTER + * + * [Node1] [Node2] + * \ / + * [BatchMatMul] [BiasValue]? + * \ / + * [Add]? + * | + * [Activation]? + * + * Nodes with "?" denote optional elements + */ +bool replace_fc_with_matmul(luci::CircleFullyConnected *fc) +{ + luci::CircleNode *x = nullptr; + luci::CircleNode *y = nullptr; + luci::CircleNode *b = nullptr; + luci::CircleTranspose *ty = nullptr; + luci::CircleTranspose *tx = nullptr; + bool adj_x = false; + bool adj_y = true; + + if (dynamic_cast<luci::CircleConst *>(fc->weights())) + return false; // NonConst + + if ((ty = dynamic_cast<luci::CircleTranspose *>(fc->weights()))) // is y a transpose? + { + adj_y = false; + if (dynamic_cast<luci::CircleConst *>(ty->a())) + return false; + else + y = loco::must_cast<luci::CircleNode *>(ty->a()); + } + else + { // y is not transpose and not const + y = loco::must_cast<luci::CircleNode *>(fc->weights()); + } + if ((tx = dynamic_cast<luci::CircleTranspose *>(fc->input()))) + { + adj_x = true; + x = loco::must_cast<luci::CircleNode *>(tx->a()); + } + else + { + x = loco::must_cast<luci::CircleNode *>(fc->input()); + } + + b = loco::must_cast<luci::CircleNode *>(fc->bias()); + + if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32 || + b->dtype() != loco::DataType::FLOAT32) + return false; + + auto name = fc->name(); + assert(name.length() > 0); + + auto matmul = fc->graph()->nodes()->create<luci::CircleBatchMatMul>(); + matmul->x(x); + matmul->y(y); + matmul->adj_x(adj_x); + matmul->adj_y(adj_y); + matmul->name(name); + matmul->dtype(fc->dtype()); + + luci::add_origin(matmul, luci::get_origin(fc)); + + auto all_zero = [](const luci::CircleConst *c) { + bool ac = true; + for (uint32_t i = 0; i < c->size<loco::DataType::FLOAT32>() && ac; i++) + { + ac &= c->at<loco::DataType::FLOAT32>(i) == 0.0f; + } + return ac; + }; + + auto bc = dynamic_cast<luci::CircleConst *>(b); + if ((nullptr != bc) && !all_zero(bc)) + { + auto bias_add = fc->graph()->nodes()->create<luci::CircleAdd>(); + bias_add->x(matmul); + bias_add->y(b); + bias_add->name(fc->name() + "/bias_add"); + bias_add->dtype(fc->dtype()); + add_origin(bias_add, get_origin(fc)); + bias_add->fusedActivationFunction(fc->fusedActivationFunction()); + loco::replace(fc).with(bias_add); + } + else + { + auto n = fromActivation(matmul, fc->fusedActivationFunction()); + add_origin(n, luci::get_origin(fc)); + n->name(fc->name() + "fusedActivation"); + n->dtype(fc->dtype()); + loco::replace(fc).with(n); + } + + return true; +} +} // namespace + +namespace luci +{ + +bool ReplaceNonConstFCWithBatchMatMulPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto fc = dynamic_cast<luci::CircleFullyConnected *>(node)) + { + if (replace_fc_with_matmul(fc)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp new file mode 100644 index 000000000..7606a6125 --- /dev/null +++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h" + +#include <luci/test/TestIOGraph.h> +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template <typename T> +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector<uint32_t> &shape, + const std::vector<T> &values) +{ + auto node = g->nodes()->create<luci::CircleConst>(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size<DT>(size); \ + for (uint32_t i = 0; i < values.size(); ++i) \ + node->at<DT>(i) = values[i]; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} + +/** + * Simple graph for test + * + * BEFORE + * + * [IFM1] [IFM2] [BIAS] + * \ | / + * [FC] + * | + * [Res] + * + * AFTER + * [IFM1] [IFM2] + * \ | + * [BatchMatMul] [BIAS] + * \ / + * [Add] + * | + * [Res] + * + */ +struct FCGraphlet +{ +public: + FCGraphlet() = default; + virtual ~FCGraphlet() = default; + + void init(loco::Graph *g, const ShapeU32 r_shape, const float bv) + { + _tr_y = g->nodes()->create<luci::CircleTranspose>(); + _tr_y->a(_y); + std::vector<int32_t> tr_val = {1, 0}; + _tr_y->perm(create_const_node(g, loco::DataType::S32, {2}, tr_val)); + + _fc = g->nodes()->create<luci::CircleFullyConnected>(); + _fc->input(_x); + _fc->weights(_tr_y); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->dtype(loco::DataType::FLOAT32); + _fc->shape(r_shape); + auto l = _fc->dim(_fc->rank() - 1).value(); + std::vector<float> bias_val(l, bv); + _fc->bias(create_const_node(g, loco::DataType::FLOAT32, {l}, bias_val)); + _fc->name("fc"); + } + +public: + luci::CircleFullyConnected *fc() { return _fc; } + +protected: + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleTranspose *_tr_y = nullptr; + luci::CircleInput *_x = nullptr; + luci::CircleInput *_y = nullptr; +}; + +struct FCGraph : public TestIsGraphlet<2>, public TestOGraphlet, public FCGraphlet +{ + FCGraph() = default; + virtual ~FCGraph() = default; + void init(const ShapeU32 x_shape, const ShapeU32 y_shape, const ShapeU32 r_shape, const float bv) + { + TestIsGraphlet<2>::init(g(), {x_shape, y_shape}); + TestOGraphlet::init(g(), r_shape); + _x = input(0); + _y = input(1); + FCGraphlet::init(g(), r_shape, bv); + output()->from(_fc); + } +}; + +class ReplaceNonConstFCWithBatchMatMulPassTest : public ::testing::Test +{ +public: + FCGraph g; + luci::ReplaceNonConstFCWithBatchMatMulPass pass; +}; + +} // namespace + +TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, simple_test) +{ + g.init({2, 3}, {2, 3}, {2, 2}, 0.0f); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto mm = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from()); + EXPECT_NE(nullptr, mm); +} + +TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test) +{ + g.init({2, 3}, {2, 3}, {2, 2}, 1.0f); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto mm = dynamic_cast<luci::CircleAdd *>(g.output()->from()); + EXPECT_NE(nullptr, mm); +} + +TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, wrong_op_NEG) +{ + loco::Graph g; + + auto inp = g.nodes()->create<luci::CircleInput>(); + auto relu = g.nodes()->create<luci::CircleRelu>(); + relu->features(inp); + + luci::ReplaceNonConstFCWithBatchMatMulPass pass; + auto changed = pass.run(&g); + + EXPECT_EQ(false, changed); +} diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp new file mode 100644 index 000000000..a65065800 --- /dev/null +++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ResolveCustomOpSplitVPass.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Profile/CircleNodeOrigin.h> +#include <luci/Service/Nodes/CircleConst.h> + +namespace +{ + +// Input node is const S64 +// Return s32 version of node +// Return nullptr if s64 value is out of range of s32 +luci::CircleConst *s64_to_s32(luci::CircleConst *node) +{ + assert(node); + assert(node->dtype() == loco::DataType::S64); + + auto cloned = luci::clone(node); + luci::add_origin(cloned, luci::get_origin(node)); + + const auto num_elems = node->size<loco::DataType::S64>(); + + cloned->dtype(loco::DataType::S32); + cloned->size<loco::DataType::S32>(num_elems); + + for (uint32_t i = 0; i < num_elems; i++) + { + int64_t val = node->at<loco::DataType::S64>(i); + if (val < std::numeric_limits<int32_t>::min() or val > std::numeric_limits<int32_t>::max()) + return nullptr; + + cloned->at<loco::DataType::S32>(i) = static_cast<int32_t>(val); + } + + return cloned; +} + +/** BEFORE + * + * [CircleNode] + * \ + * \ [size_splits] [split_dim] + * \ | / + * [CircleCustom(SplitV))] + * | + * [CircleCustomOut] + * | + * [CircleNode] + * + * AFTER + * + * [CircleNode] + * | \ + * | \ [size_splits] [split_dim] + * | \ | / + * | \ | / + * | \ | / + * [CircleCustom(SplitV)] [CircleSplitV] + * | | + * [CircleCustomOut] [CircleSplitVOut] + * | + * [CircleNode] + */ +bool resolve_splitv(luci::CircleCustom *node) +{ + const std::string custom_code = node->custom_code(); + const std::vector<uint8_t> custom_options = node->custom_options(); + + if (custom_code != "SplitV") + return false; + + if (node->numInputs() != 3) + return false; + + auto size_splits = dynamic_cast<luci::CircleConst *>(node->inputs(1)); + if (not size_splits) + return false; + + // Convert size_splits to S32, because luci-interpeter does not support + // S64 size_splits yet + // TODO Support S64 size_splits + if (size_splits->dtype() == loco::DataType::S64) + { + size_splits = s64_to_s32(size_splits); + if (not size_splits) + return false; + } + if (size_splits->dtype() != loco::DataType::S32) + return false; + + auto split_dim = dynamic_cast<luci::CircleConst *>(node->inputs(2)); + if (not split_dim) + return false; + + if (split_dim->dtype() == loco::DataType::S64) + { + split_dim = s64_to_s32(split_dim); + if (not split_dim) + return false; + } + if (split_dim->dtype() != loco::DataType::S32) + return false; + + if (size_splits->rank() != 1) + return false; + + const auto num_split = size_splits->dim(0).value(); + + auto split_v = node->graph()->nodes()->create<luci::CircleSplitV>(); + split_v->input(node->inputs(0)); + split_v->size_splits(size_splits); + split_v->split_dim(split_dim); + split_v->num_split(num_split); + split_v->name(node->name()); + luci::add_origin(split_v, luci::get_origin(node)); + + int32_t i = 0; + const auto succs = loco::succs(node); + for (auto succ : succs) + { + auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ); // FIX_CALLER_UNLESS + + auto split_v_out = node->graph()->nodes()->create<luci::CircleSplitVOut>(); + split_v_out->input(split_v); + split_v_out->name(node->name() + "_out_" + std::to_string(i)); + split_v_out->index(i++); + luci::add_origin(split_v_out, luci::get_origin(node)); + loco::replace(custom_out).with(split_v_out); + } + + return true; +} + +} // namespace + +namespace luci +{ + +bool ResolveCustomOpSplitVPass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto cop = dynamic_cast<luci::CircleCustom *>(node); + if (not cop) + continue; + + if (resolve_splitv(cop)) + changed = true; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp new file mode 100644 index 000000000..e7738aadb --- /dev/null +++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ResolveCustomOpSplitVPass.h" + +#include <luci/test/TestIOGraph.h> + +#include <luci/IR/CircleNodes.h> +#include <gtest/gtest.h> + +using namespace luci::test; + +namespace +{ + +/** + * graph having Custom operator SplitV + * + * [Input] [Const] [Const] + * \ | / + * [Custom(SplitV)] + * / | \ + * [CustomOut] [CustomOut] [CustomOut] + * | | | + * [Output] [Output] [Output] + */ +class SplitVGraphlet +{ +public: + SplitVGraphlet() = default; + +public: + void init(loco::Graph *g) + { + // CircleCustom(SplitV) + _splitv = g->nodes()->create<luci::CircleCustom>(3, 3); + _splitv->custom_code("SplitV"); + _splitv->shape({1, 2, 2, 192}); + _splitv->dtype(loco::DataType::FLOAT32); + _splitv->name("splitv"); + + // CircleConst + auto size_splits = g->nodes()->create<luci::CircleConst>(); + size_splits->dtype(loco::DataType::S64); + size_splits->shape({3}); + size_splits->size<loco::DataType::S64>(3); + size_splits->at<loco::DataType::S64>(0) = 32; + size_splits->at<loco::DataType::S64>(1) = 32; + size_splits->at<loco::DataType::S64>(2) = 128; + + // CircleConst + auto split_dim = g->nodes()->create<luci::CircleConst>(); + split_dim->dtype(loco::DataType::S32); + split_dim->rank(0); + split_dim->size<loco::DataType::S32>(1); + split_dim->scalar<loco::DataType::S32>() = 3; + + _splitv->inputs(1, size_splits); + _splitv->inputs(2, split_dim); + + // CircleCustomOut + _splitv_out1 = g->nodes()->create<luci::CircleCustomOut>(); + _splitv_out1->shape({1, 2, 2, 32}); + _splitv_out1->dtype(loco::DataType::FLOAT32); + _splitv_out1->index(0); + _splitv_out1->input(_splitv); + + // CircleCustomOut + _splitv_out2 = g->nodes()->create<luci::CircleCustomOut>(); + _splitv_out2->shape({1, 2, 2, 32}); + _splitv_out2->dtype(loco::DataType::FLOAT32); + _splitv_out2->index(1); + _splitv_out2->input(_splitv); + + // CircleCustomOut + _splitv_out3 = g->nodes()->create<luci::CircleCustomOut>(); + _splitv_out3->shape({1, 2, 2, 128}); + _splitv_out3->dtype(loco::DataType::FLOAT32); + _splitv_out3->index(2); + _splitv_out3->input(_splitv); + } + +public: + luci::CircleCustom *splitv() { return _splitv; } + +protected: + luci::CircleCustom *_splitv = nullptr; + luci::CircleCustomOut *_splitv_out1 = nullptr; + luci::CircleCustomOut *_splitv_out2 = nullptr; + luci::CircleCustomOut *_splitv_out3 = nullptr; +}; + +class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet +{ +public: + SplitVGraph() = default; + + void init(void) + { + TestIGraphlet::init(g(), {1, 2, 2, 192}); + TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}}); + SplitVGraphlet::init(g()); + + // connect graph + _splitv->inputs(0, input()); + + output(0)->from(_splitv_out1); + output(1)->from(_splitv_out2); + output(2)->from(_splitv_out3); + } +}; + +class SplitVGraphTest : public ::testing::Test +{ +public: + SplitVGraph g; + luci::ResolveCustomOpSplitVPass pass; +}; + +} // namespace + +TEST_F(SplitVGraphTest, simple_test) +{ + g.init(); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto svo_1 = dynamic_cast<luci::CircleSplitVOut *>(g.output(0)->from()); + EXPECT_NE(nullptr, svo_1); + auto svo_2 = dynamic_cast<luci::CircleSplitVOut *>(g.output(1)->from()); + EXPECT_NE(nullptr, svo_2); + auto svo_3 = dynamic_cast<luci::CircleSplitVOut *>(g.output(2)->from()); + EXPECT_NE(nullptr, svo_3); + + auto sv = dynamic_cast<luci::CircleSplitV *>(svo_1->input()); + EXPECT_NE(nullptr, sv); + sv = dynamic_cast<luci::CircleSplitV *>(svo_2->input()); + EXPECT_NE(nullptr, sv); + sv = dynamic_cast<luci::CircleSplitV *>(svo_3->input()); + EXPECT_NE(nullptr, sv); + + auto size_splits = loco::must_cast<luci::CircleConst *>(sv->size_splits()); + EXPECT_EQ(loco::DataType::S32, size_splits->dtype()); + EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(0)); + EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(1)); + EXPECT_EQ(128, size_splits->at<loco::DataType::S32>(2)); + + auto split_dim = loco::must_cast<luci::CircleConst *>(sv->split_dim()); + EXPECT_EQ(loco::DataType::S32, split_dim->dtype()); + EXPECT_EQ(3, split_dim->scalar<loco::DataType::S32>()); +} + +TEST_F(SplitVGraphTest, wrong_op_NEG) +{ + g.init(); + + g.splitv()->custom_code("AddV2"); + + auto ret = pass.run(g.g()); + EXPECT_EQ(false, ret); +} diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h index 442183c18..408e6b8d9 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h @@ -197,6 +197,13 @@ private: return true; } + bool visit(const luci::CircleReduceMax *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)); + RETURN_FALSE_UNLESS(is_lwq(node->input())); + return true; + } + bool visit(const luci::CircleRelu *node) { RETURN_FALSE_UNLESS(is_lwq(node)); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp index 4e1c062c0..cf86acabe 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp +++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp @@ -302,6 +302,15 @@ bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *nod } template <loco::DataType Qtype, loco::DataType Btype> +bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReduceMax *node) +{ + RETURN_FALSE_UNLESS(has_type(node, Qtype)) + RETURN_FALSE_UNLESS(has_type(node->input(), Qtype)) + RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32)) + return true; +} + +template <loco::DataType Qtype, loco::DataType Btype> bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node) { return group_has_type(node, Qtype); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h index ff1acbd6f..789d3c7cd 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeType.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h @@ -104,6 +104,7 @@ private: bool visit(const luci::CirclePadV2 *node); bool visit(const luci::CirclePRelu *node); bool visit(const luci::CirclePow *node); + bool visit(const luci::CircleReduceMax *node); bool visit(const luci::CircleRelu *node); bool visit(const luci::CircleReshape *node); bool visit(const luci::CircleResizeBilinear *node); diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp new file mode 100644 index 000000000..72b7d60ff --- /dev/null +++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// codes under namespace sparsity referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +#include "SparsityFormatConverter.h" + +#include <oops/InternalExn.h> + +#include <cassert> + +namespace sparsity +{ + +namespace +{ + +uint64_t GetFlattenedIndex(const std::vector<int> &indices, const std::vector<int> &shape) +{ + uint64_t index = 0; + int sub_elements = 1; + for (int i = shape.size() - 1; i >= 0; i--) + { + index += indices[i] * sub_elements; + sub_elements *= shape[i]; + } + return index; +} + +std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray *int_array) +{ + std::vector<int> values; + if (!int_array) + { + return values; + } + + values.resize(int_array->size); + for (int i = 0; i < int_array->size; i++) + { + values[i] = int_array->data[i]; + } + + return values; +} + +} // namespace + +template <typename T> +FormatConverter<T>::FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity) +{ + auto traversal_order = TfLiteIntArrayToVector(sparsity.traversal_order); + auto block_map = TfLiteIntArrayToVector(sparsity.block_map); + + std::vector<TfLiteDimensionType> format(sparsity.dim_metadata_size); + std::vector<int> dense_size(sparsity.dim_metadata_size); + std::vector<std::vector<int>> segments(sparsity.dim_metadata_size); + std::vector<std::vector<int>> indices(sparsity.dim_metadata_size); + for (int i = 0; i < sparsity.dim_metadata_size; i++) + { + format[i] = sparsity.dim_metadata[i].format; + dense_size[i] = sparsity.dim_metadata[i].dense_size; + segments[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_segments); + indices[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_indices); + } + + InitSparseToDenseConverter(shape, std::move(traversal_order), std::move(format), + std::move(dense_size), std::move(segments), std::move(indices), + std::move(block_map)); +} + +template <typename T> +void FormatConverter<T>::InitSparseToDenseConverter( + std::vector<int> shape, std::vector<int> traversal_order, std::vector<TfLiteDimensionType> format, + std::vector<int> dense_size, std::vector<std::vector<int>> segments, + std::vector<std::vector<int>> indices, std::vector<int> block_map) +{ + dense_shape_ = std::move(shape); + traversal_order_ = std::move(traversal_order); + block_map_ = std::move(block_map); + format_ = std::move(format); + + dense_size_ = 1; + for (size_t i = 0; i < dense_shape_.size(); i++) + { + dense_size_ *= dense_shape_[i]; + } + + dim_metadata_.resize(2 * format_.size()); + for (size_t i = 0; i < format_.size(); i++) + { + if (format_[i] == kTfLiteDimDense) + { + dim_metadata_[2 * i] = {dense_size[i]}; + } + else + { + dim_metadata_[2 * i] = std::move(segments[i]); + dim_metadata_[2 * i + 1] = std::move(indices[i]); + } + } + + int original_rank = dense_shape_.size(); + int block_dim = 0; + + blocked_shape_.resize(original_rank); + block_size_.resize(block_map_.size()); + for (int i = 0; i < original_rank; i++) + { + if (block_dim < (int)block_map_.size() && block_map_[block_dim] == i) + { + if (original_rank + block_dim < (int)traversal_order_.size()) + { + int orig_dim = traversal_order_[original_rank + block_dim]; + block_size_[block_dim] = dense_size[orig_dim]; + blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim]; + block_dim++; + } + } + else + { + blocked_shape_[i] = dense_shape_[i]; + } + } +} + +template <typename T> +void FormatConverter<T>::Populate(const T *src_data, std::vector<int> indices, int level, + int prev_idx, int *src_data_ptr, T *dest_data) +{ + if (static_cast<size_t>(level) == indices.size()) + { + int orig_rank = dense_shape_.size(); + std::vector<int> orig_idx; + orig_idx.resize(orig_rank); + int i = 0; + for (; static_cast<size_t>(i) < orig_idx.size(); i++) + { + int orig_dim = traversal_order_[i]; + orig_idx[orig_dim] = indices[i]; + } + + for (; static_cast<size_t>(i) < indices.size(); i++) + { + const int block_idx = traversal_order_[i] - orig_rank; + const int orig_dim = block_map_[block_idx]; + orig_idx[orig_dim] = orig_idx[orig_dim] * block_size_[block_idx] + indices[i]; + } + + dest_data[GetFlattenedIndex(orig_idx, dense_shape_)] = src_data[*src_data_ptr]; + + *src_data_ptr = *src_data_ptr + 1; + return; + } + + const int metadata_idx = 2 * level; + const int shape_of_level = dim_metadata_[metadata_idx][0]; + if (format_[level] == kTfLiteDimDense) + { + for (int i = 0; i < shape_of_level; i++) + { + indices[level] = i; + Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i, src_data_ptr, + dest_data); + } + } + else if (static_cast<size_t>(prev_idx + 1) < dim_metadata_[metadata_idx].size()) + { + const auto &array_segments = dim_metadata_[metadata_idx]; + const auto &array_indices = dim_metadata_[metadata_idx + 1]; + for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1]; i++) + { + if (static_cast<size_t>(i) < array_indices.size() && + static_cast<size_t>(level) < indices.size()) + { + indices[level] = array_indices[i]; + Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data); + } + } + } +} + +template <typename T> bool FormatConverter<T>::SparseToDense(const T *src_data) +{ + data_.resize(dense_size_); + std::fill(data_.begin(), data_.end(), T(0)); + + int total_rank = traversal_order_.size(); + int src_data_ptr = 0; + std::vector<int> indices(total_rank); + Populate(src_data, indices, 0, 0, &src_data_ptr, data_.data()); + + return true; +} + +template class FormatConverter<float>; +template class FormatConverter<uint16_t>; + +} // namespace sparsity + +#include <luci/IR/SparsityParam.h> + +namespace luci +{ + +sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt) +{ + switch (dt) + { + case luci::DimensionType::DENSE: + return sparsity::TfLiteDimensionType::kTfLiteDimDense; + case luci::DimensionType::SPARSE_CSR: + return sparsity::TfLiteDimensionType::kTfLiteDimSparseCSR; + } + return sparsity::TfLiteDimensionType::kTfLiteDimDense; +} + +sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data) +{ + auto type = data.type(); + switch (type) + { + case luci::SparseIndexVectorType::NONE: + { + std::vector<int32_t> empty; + return makeTfLiteArray(empty); + } + case luci::SparseIndexVectorType::I32: + return makeTfLiteArray<int32_t>(*data.as_int32_vector()); + case luci::SparseIndexVectorType::U16: + return makeTfLiteArray<uint16_t>(*data.as_uint16_vector()); + case luci::SparseIndexVectorType::U8: + return makeTfLiteArray<uint8_t>(*data.as_uint8_vector()); + default: + INTERNAL_EXN_V("unsupported SparseIndexVectorType", oops::to_uint32(type)); + } +} + +sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp) +{ + sparsity::TfLiteSparsity tflsp; + tflsp.traversal_order = makeTfLiteArray(sp->traversal_order); + tflsp.block_map = makeTfLiteArray(sp->block_map); + tflsp.dim_metadata = makeTfLiteDimensionMetadata(sp->dim_metadata); + tflsp.dim_metadata_size = sp->dim_metadata.size(); + return tflsp; +} + +template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data) +{ + size_t cn = data.size(); + size_t sz = 1 + data.size(); + sparsity::TfLiteIntArray *sp = (sparsity::TfLiteIntArray *)(new int[sz]); + sp->size = cn; + for (size_t i = 0; i < cn; ++i) + { + sp->data[i] = data[i]; + } + return sp; +} + +sparsity::TfLiteDimensionMetadata * +makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data) +{ + size_t cn = data.size(); + sparsity::TfLiteDimensionMetadata *tfldm = new sparsity::TfLiteDimensionMetadata[cn]; + + for (size_t i = 0; i < cn; ++i) + { + tfldm[i].format = to_tflite_sparsity(data[i].format()); + tfldm[i].dense_size = data[i].dense_size(); + tfldm[i].array_segments = to_tflite_sparsity(data[i].array_segments()); + tfldm[i].array_indices = to_tflite_sparsity(data[i].array_indices()); + } + + return tfldm; +} + +void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp) +{ + assert(tflsp.traversal_order); + assert(tflsp.block_map); + delete[] tflsp.traversal_order; + delete[] tflsp.block_map; + + for (int i = 0; i < tflsp.dim_metadata_size; ++i) + { + assert(tflsp.dim_metadata[i].array_segments); + assert(tflsp.dim_metadata[i].array_indices); + delete[] tflsp.dim_metadata[i].array_segments; + delete[] tflsp.dim_metadata[i].array_indices; + } +} + +} // namespace luci diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.h b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h new file mode 100644 index 000000000..fcd9bbcd0 --- /dev/null +++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__ +#define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__ + +#include <cstdint> +#include <vector> + +// codes under namespace sparsity referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +namespace sparsity +{ + +// Storage format of each dimension in a sparse tensor. +typedef enum TfLiteDimensionType +{ + kTfLiteDimDense = 0, + kTfLiteDimSparseCSR, +} TfLiteDimensionType; + +// Fixed size list of integers. Used for dimensions and inputs/outputs tensor +// indices +typedef struct TfLiteIntArray +{ + int size; + int data[]; +} TfLiteIntArray; + +// Metadata to encode each dimension in a sparse tensor. +typedef struct TfLiteDimensionMetadata +{ + TfLiteDimensionType format; + int dense_size; + TfLiteIntArray *array_segments; + TfLiteIntArray *array_indices; +} TfLiteDimensionMetadata; + +// Parameters used to encode a sparse tensor. For detailed explanation of each +// field please refer to lite/schema/schema.fbs. +typedef struct TfLiteSparsity +{ + TfLiteIntArray *traversal_order; + TfLiteIntArray *block_map; + TfLiteDimensionMetadata *dim_metadata; + int dim_metadata_size; +} TfLiteSparsity; + +// A converter that keeps an internal representation of sparse tensor parameters +// and converts tensors between dense and sparse formats. +template <typename T> class FormatConverter +{ +public: + /* Creates a sparse to dense converter. + * @param shape Shape of the target dense tensor. + * @param sparsity Sparsity parameter of the sparse TfLiteTensor. + */ + FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity); + + const std::vector<T> &GetData() { return data_; } + const std::vector<std::vector<int>> &GetDimMetadata() { return dim_metadata_; } + + bool SparseToDense(const T *src_data); + +private: + // Helper function for initializing this converter for sparse to dense + // conversion. + void InitSparseToDenseConverter(std::vector<int> shape, std::vector<int> traversal_order, + std::vector<TfLiteDimensionType> format, + std::vector<int> dense_size, + std::vector<std::vector<int>> segments, + std::vector<std::vector<int>> indices, + std::vector<int> block_map); + + void Populate(const T *src_data, std::vector<int> indices, int level, int prev_idx, + int *src_data_ptr, T *dest_data); + +private: + std::vector<int> dense_shape_; + std::vector<int> blocked_shape_; + size_t dense_size_; + std::vector<int> traversal_order_; + std::vector<TfLiteDimensionType> format_; + std::vector<int> block_size_; + std::vector<int> block_map_; + std::vector<std::vector<int>> dim_metadata_; + std::vector<T> data_; +}; + +extern template class FormatConverter<float>; +extern template class FormatConverter<uint16_t>; + +} // namespace sparsity + +#include <luci/IR/SparsityParam.h> + +namespace luci +{ + +sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt); +sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data); +sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp); + +template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data); +sparsity::TfLiteDimensionMetadata * +makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data); + +void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp); + +} // namespace luci + +#endif // __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__ diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake index e896188be..0a5e6a58b 100644 --- a/compiler/luci/requires.cmake +++ b/compiler/luci/requires.cmake @@ -10,4 +10,5 @@ require("oops") require("hermes") require("hermes-std") require("tflchef") +require("circlechef") require("tflite2circle") diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h index 99e4561b3..95f06db4c 100644 --- a/compiler/luci/service/src/CircleCloneNode.h +++ b/compiler/luci/service/src/CircleCloneNode.h @@ -72,6 +72,7 @@ public: CloneNodeLet(loco::Graph *graph) : _graph(graph){}; public: + luci::CircleNode *visit(const luci::CircleDensify *) final; luci::CircleNode *visit(const luci::CircleDepthToSpace *) final; luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final; luci::CircleNode *visit(const luci::CircleDequantize *) final; diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp index 9d156f3e2..a368faef4 100644 --- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp @@ -204,6 +204,7 @@ template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node) return loco::NodeShape{inputs_shape}; \ } +DECLARE_USE_SINGLE(input); DECLARE_USE_SINGLE(inputs); DECLARE_USE_SINGLE(x); DECLARE_USE_SINGLE(logits); @@ -258,10 +259,10 @@ loco::NodeShape infer_add_n(const luci::CircleAddN *node) return loco::NodeShape{shape}; } -loco::NodeShape infer_arg_max(const luci::CircleArgMax *node) +template <class CIRCLENODE> loco::NodeShape infer_arg_maxmin(const CIRCLENODE *node) { - auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); - auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>(); + auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>(); + auto dimension_shape = luci::shape_get(node->dimension()).template as<loco::TensorShape>(); int64_t select_axis = 0; { @@ -271,55 +272,19 @@ loco::NodeShape infer_arg_max(const luci::CircleArgMax *node) // Support S32 for now. auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension()); LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst for CircleArgMax"); + "Only support int32 CircleConst for CircleArgMax/CircleArgMin"); if (const_shape_node->rank() > 1) INTERNAL_EXN_V("Only support rank 0/1 CircleConst", oops::to_uint32(const_shape_node->rank())); - select_axis = const_shape_node->scalar<loco::DataType::S32>(); - } - assert(select_axis < input_shape.rank()); - assert(select_axis >= 0); // TODO support minus of this breaks - - // NOTE select_axis is removed - loco::TensorShape shape_output; - uint32_t rank = input_shape.rank(); - uint32_t shrink = static_cast<uint32_t>(select_axis); - assert(rank > 0); - shape_output.rank(rank - 1); - for (uint32_t r = 0, d = 0; r < rank; ++r) - { - if (r == shrink) - continue; - shape_output.dim(d++) = input_shape.dim(r); + select_axis = const_shape_node->template scalar<loco::DataType::S32>(); } - return loco::NodeShape{shape_output}; -} - -loco::NodeShape infer_arg_min(const luci::CircleArgMin *node) -{ - auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); - auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>(); - - int64_t select_axis = 0; - { - LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr"); - - // Only support node's shape() is CircleConst with S32/S64 - // Support S32 for now. - auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension()); - LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32, - "Only support int32 CircleConst for CircleArgMin"); - - if (const_shape_node->rank() > 1) - INTERNAL_EXN_V("Only support rank 0/1 CircleConst", - oops::to_uint32(const_shape_node->rank())); - select_axis = const_shape_node->scalar<loco::DataType::S32>(); - } assert(select_axis < input_shape.rank()); - assert(select_axis >= 0); // TODO support minus of this breaks + + if (select_axis < 0) + select_axis += input_shape.rank(); // NOTE select_axis is removed loco::TensorShape shape_output; @@ -1180,45 +1145,17 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node) return loco::NodeShape{output_shape}; } -loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node) +template <class CIRCLENODE> loco::NodeShape infer_resize_type(const CIRCLENODE *node) { - auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); - - if (input_shape.rank() != 4) - INTERNAL_EXN("Expected ResizeBilinear input to have rank 4"); - - auto *const_node = loco::must_cast<luci::CircleConst *>(node->size()); - - if (const_node->dtype() != loco::DataType::S32) - INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size"); - - if (const_node->rank() != 1) - INTERNAL_EXN("Expected size tensor of rank 1"); - - if (const_node->dim(0).value() != 2) - INTERNAL_EXN("Expected size tensor with shape [2]"); - - loco::TensorShape output_shape; - output_shape.rank(4); - output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(1) = const_node->at<loco::DataType::S32>(0); - output_shape.dim(2) = const_node->at<loco::DataType::S32>(1); - output_shape.dim(3) = input_shape.dim(3); - - return loco::NodeShape{output_shape}; -} - -loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node) -{ - auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>(); + auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>(); if (input_shape.rank() != 4) - INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4"); + INTERNAL_EXN("Expected input to have rank 4"); auto *const_node = loco::must_cast<luci::CircleConst *>(node->size()); if (const_node->dtype() != loco::DataType::S32) - INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size"); + INTERNAL_EXN("Only S32 datatype is supported for size"); if (const_node->rank() != 1) INTERNAL_EXN("Expected size tensor of rank 1"); @@ -1229,8 +1166,8 @@ loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNei loco::TensorShape output_shape; output_shape.rank(4); output_shape.dim(0) = input_shape.dim(0); - output_shape.dim(1) = const_node->at<loco::DataType::S32>(0); - output_shape.dim(2) = const_node->at<loco::DataType::S32>(1); + output_shape.dim(1) = const_node->template at<loco::DataType::S32>(0); + output_shape.dim(2) = const_node->template at<loco::DataType::S32>(1); output_shape.dim(3) = input_shape.dim(3); return loco::NodeShape{output_shape}; @@ -2080,9 +2017,9 @@ public: loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); } - loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); } + loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_maxmin(node); } - loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); } + loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_maxmin(node); } loco::NodeShape visit(const luci::CircleAveragePool2D *node) final { @@ -2119,6 +2056,8 @@ public: loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); } + loco::NodeShape visit(const luci::CircleDensify *node) final { return use_input(node); } + loco::NodeShape visit(const luci::CircleDepthToSpace *node) final { return infer_depth_to_space(node); @@ -2348,12 +2287,12 @@ public: loco::NodeShape visit(const luci::CircleResizeBilinear *node) final { - return infer_resize_bilinear(node); + return infer_resize_type(node); } loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final { - return infer_resize_nearest_neighbor(node); + return infer_resize_type(node); } loco::NodeShape visit(const luci::CircleReverseSequence *node) final diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp index 438c4a364..7616390ae 100644 --- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp @@ -102,6 +102,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT return node->dtype(); } + loco::DataType visit(const luci::CircleDensify *node) final + { + return luci::dtype_get(node->input()); + } + loco::DataType visit(const luci::CircleDepthToSpace *node) final { return luci::dtype_get(node->input()); diff --git a/compiler/luci/service/src/Nodes/CircleDensify.cpp b/compiler/luci/service/src/Nodes/CircleDensify.cpp new file mode 100644 index 000000000..a0d15b6c7 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleDensify.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CircleCloneNode.h" + +namespace luci +{ + +luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDensify *) +{ + return _graph->nodes()->create<luci::CircleDensify>(); +} + +} // namespace luci diff --git a/compiler/luci/service/src/Nodes/CircleDensify.test.cpp b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp new file mode 100644 index 000000000..d0f32c1a2 --- /dev/null +++ b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Service/CircleNodeClone.h" + +#include <gtest/gtest.h> + +TEST(CloneNodeTest, clone_Densify) +{ + auto g = loco::make_graph(); + auto node_densify = g->nodes()->create<luci::CircleDensify>(); + + auto gc = loco::make_graph(); + auto cloned = luci::clone_node(node_densify, gc.get()); + ASSERT_NE(nullptr, cloned); + ASSERT_EQ(gc.get(), cloned->graph()); + + auto cloned_densify = dynamic_cast<luci::CircleDensify *>(cloned); + ASSERT_NE(nullptr, cloned_densify); +} diff --git a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp index c5864f938..77135cca0 100644 --- a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp +++ b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp @@ -24,16 +24,22 @@ #include <loco/IR/NodeShape.h> #include <oops/InternalExn.h> +#include <algorithm> #include <cmath> #include <cstdint> #include <limits> +// code referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/strided_slice.cc +// tensorflow/lite/kernels/internal/strided_slice_logic.h + namespace { -// This Op only supports 1-4D cases and since we use the reference 4D +// This Op only supports 1-5D cases and since we use the reference 4D // implementation, the 1-3D tensors are mapped to 4D. -const int kMaxDim = 4; +const int kMaxDim = 5; const loco::DataType S32 = loco::DataType::S32; @@ -42,18 +48,47 @@ using int16 = int16_t; struct StridedSliceParams { - int8 start_indices_count; + int8 start_indices_count = 0; int16 start_indices[kMaxDim]; - int8 stop_indices_count; + int8 stop_indices_count = 0; int16 stop_indices[kMaxDim]; - int8 strides_count; + int8 strides_count = 0; int16 strides[kMaxDim]; - int16 begin_mask; - int16 ellipsis_mask; - int16 end_mask; - int16 new_axis_mask; - int16 shrink_axis_mask; + int16 begin_mask = 0; + int16 ellipsis_mask = 0; + int16 end_mask = 0; + int16 new_axis_mask = 0; + int16 shrink_axis_mask = 0; +}; + +struct StridedSliceContext +{ + StridedSliceContext(const luci::CircleStridedSlice *node) + { + params.begin_mask = node->begin_mask(); + params.ellipsis_mask = node->ellipsis_mask(); + params.end_mask = node->end_mask(); + params.new_axis_mask = node->new_axis_mask(); + params.shrink_axis_mask = node->shrink_axis_mask(); + + input = loco::must_cast<luci::CircleNode *>(node->input()); + begin = loco::must_cast<luci::CircleConst *>(node->begin()); + end = loco::must_cast<luci::CircleConst *>(node->end()); + strides = loco::must_cast<luci::CircleConst *>(node->strides()); + + loco::TensorShape input_shape = luci::shape_get(input).as<loco::TensorShape>(); + input_dims = input_shape.rank(); + } + StridedSliceParams params; + luci::CircleNode *input = nullptr; + luci::CircleConst *begin = nullptr; + luci::CircleConst *end = nullptr; + luci::CircleConst *strides = nullptr; + + // Equivalent input shape after adding axis according to new_axis_mask. + loco::TensorShape effective_input_shape; + uint32_t input_dims = 0; }; // Use until std::clamp() is available from C++17. @@ -70,8 +105,8 @@ inline int Clamp(const int32_t v, const int32_t lo, const int32_t hi) // Return the index for the first element along that axis. This index will be a // positive integer between [0, axis_size - 1] that can be used to index // directly into the data. -inline int StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, - uint32_t axis) +inline int32_t StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, + uint32_t axis) { const auto begin_mask = params.begin_mask; const auto *start_indices = params.start_indices; @@ -108,7 +143,16 @@ inline int StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShap } // Clamping - start = Clamp(start, 0, axis_size - 1); + if (strides[axis] > 0) + { + // Forward iteration + start = Clamp(start, 0, axis_size); + } + else + { + // Backward iteration + start = Clamp(start, -1, axis_size - 1); + } return start; } @@ -118,14 +162,14 @@ inline int StartForAxis(const StridedSliceParams ¶ms, const loco::TensorShap // element. ie. So if you were iterating through all elements of a 1D array of // size 4, this function would return 4 as the stop, because it is one past the // "real" indices of 0, 1, 2 & 3. -inline int StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, - int axis, int start_for_axis) +inline int32_t StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape &input_shape, + int32_t axis, int32_t start_for_axis) { const auto end_mask = params.end_mask; const auto shrink_axis_mask = params.shrink_axis_mask; const auto *stop_indices = params.stop_indices; const auto *strides = params.strides; - const int axis_size = static_cast<int32_t>(input_shape.dim(axis).value()); + const int32_t axis_size = static_cast<int32_t>(input_shape.dim(axis).value()); if (axis_size == 0) { return 0; @@ -141,7 +185,7 @@ inline int StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape // already been adjusted for negative indices. if (shrink_axis) { - stop = start_for_axis + 1; + return start_for_axis + 1; } // end_mask override @@ -183,37 +227,125 @@ inline int StopForAxis(const StridedSliceParams ¶ms, const loco::TensorShape return stop; } -StridedSliceParams BuildStridedSliceParams(const luci::CircleStridedSlice *node) +StridedSliceParams BuildStridedSliceParams(StridedSliceContext *op_context) { StridedSliceParams op_params; - if (kMaxDim < node->rank()) + // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks + // are processed here to update begin_mask, end_mask and the index range. + op_params.begin_mask = 0; + op_params.ellipsis_mask = 0; + op_params.end_mask = 0; + op_params.new_axis_mask = 0; + op_params.shrink_axis_mask = 0; + + // Count indexes where the new_axis_mask is set but the ellipsis_mask is not. + loco::TensorShape begin_shape = luci::shape_get(op_context->begin).as<loco::TensorShape>(); + const uint32_t begin_count = begin_shape.dim(0).value(); + uint32_t num_add_axis = 0; + for (uint32_t i = 0; i < begin_count; ++i) { - INTERNAL_EXN_V("Cannot support StridedSlice rank > ", kMaxDim); + if (!((1 << i) & op_context->params.ellipsis_mask) && + ((1 << i) & op_context->params.new_axis_mask)) + { + num_add_axis++; + } } - auto begin_node = loco::must_cast<luci::CircleConst *>(node->begin()); - auto end_node = loco::must_cast<luci::CircleConst *>(node->end()); - auto strides_node = loco::must_cast<luci::CircleConst *>(node->strides()); + // Calculate the dims of input after adding new axises. + const uint32_t effective_dims = op_context->input_dims + num_add_axis; + + // If begin, end and strides are not fully provided, it means Ellipsis should + // be expanded to multiple dimensions (Ex: for spec [Ellipsis, 2] on a 3D + // input, the Ellipsis should be applied for the first 2 dimensions). Besides, + // If the new_axis_mask and the ellipsis_mask are set at the same index, the + // new_axis_mask will have no effect. + int32_t effective_ellipsis_mask = 0, effective_new_axis_mask = 0; + uint32_t ellipsis_start_idx = effective_dims, expanded_ellipsis = 0; + for (uint32_t i = 0; i < effective_dims;) + { + if ((1 << i) & op_context->params.ellipsis_mask) + { + ellipsis_start_idx = i; + uint32_t ellipsis_end_idx = + std::max(i + 1, std::min(i + 1 + num_add_axis + op_context->input_dims - begin_count, + effective_dims)); + expanded_ellipsis = ellipsis_end_idx - ellipsis_start_idx - 1; + + // Set bit for effective_ellipsis_mask. + for (; i < ellipsis_end_idx; ++i) + { + effective_ellipsis_mask |= (1 << i); + } + continue; + } - uint32_t dims_count = begin_node->size<S32>(); + if ((1 << (i - expanded_ellipsis)) & op_context->params.new_axis_mask) + { + effective_new_axis_mask |= (1 << i); + } + ++i; + } - op_params.start_indices_count = dims_count; - op_params.stop_indices_count = dims_count; - op_params.strides_count = dims_count; + // Calculate effective_input_shape and its corresponding begin, end, strides. + loco::TensorShape input_shape = luci::shape_get(op_context->input).as<loco::TensorShape>(); + uint32_t added_ellipsis = 0, added_axises = 0; + op_context->effective_input_shape.rank(effective_dims); - for (uint32_t i = 0; i < dims_count; ++i) + for (uint32_t i = 0; i < effective_dims; ++i) { - op_params.start_indices[i] = begin_node->at<S32>(i); - op_params.stop_indices[i] = end_node->at<S32>(i); - op_params.strides[i] = strides_node->at<S32>(i); + if ((1 << i) & effective_ellipsis_mask) + { + // If ellipsis_mask, set the begin_mask and end_mask at that index. + added_ellipsis = std::max(0u, i - ellipsis_start_idx); + op_params.begin_mask |= (1 << i); + op_params.end_mask |= (1 << i); + op_params.strides[i] = 1; + op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises); + } + else if ((1 << i) & effective_new_axis_mask) + { + // If new_axis_mask is set, it is equivalent to adding a new dim of 1 to + // input tensor. Store added shape to effective_input_shape. + op_params.start_indices[i] = 0; + op_params.stop_indices[i] = 1; + op_params.strides[i] = 1; + op_context->effective_input_shape.dim(i) = loco::Dimension(1); + added_axises++; + } + else if (i >= begin_count + expanded_ellipsis) + { + op_params.start_indices[i] = 0; + op_params.stop_indices[i] = 0; + op_params.strides[i] = 1; + op_params.begin_mask |= (1 << i); + op_params.end_mask |= (1 << i); + op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises); + } + else + { + const uint32_t orig_idx = i - added_ellipsis; + op_params.start_indices[i] = op_context->begin->at<S32>(orig_idx); + op_params.stop_indices[i] = op_context->end->at<S32>(orig_idx); + op_params.strides[i] = op_context->strides->at<S32>(orig_idx); + if (op_context->params.begin_mask & (1 << orig_idx)) + { + op_params.begin_mask |= (1 << i); + } + if (op_context->params.end_mask & (1 << orig_idx)) + { + op_params.end_mask |= (1 << i); + } + if (op_context->params.shrink_axis_mask & (1 << orig_idx)) + { + op_params.shrink_axis_mask |= (1 << i); + } + op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises); + } } - - op_params.begin_mask = node->begin_mask(); - op_params.ellipsis_mask = 0; - op_params.end_mask = node->end_mask(); - op_params.new_axis_mask = 0; - op_params.shrink_axis_mask = node->shrink_axis_mask(); + op_params.start_indices_count = effective_dims; + op_params.stop_indices_count = effective_dims; + op_params.strides_count = effective_dims; return op_params; } @@ -241,55 +373,54 @@ loco::TensorShape infer_output_shape(const CircleStridedSlice *node) LUCI_ASSERT(end_node->dtype() == S32, "Only support S32 for end_node"); LUCI_ASSERT(strides_node->dtype() == S32, "Only support S32 for strides_node"); - assert(node->ellipsis_mask() == 0); - assert(node->new_axis_mask() == 0); + LUCI_ASSERT(begin_node->rank() == 1, "Only support rank 1 for begin_node"); + LUCI_ASSERT(end_node->rank() == 1, "Only support rank 1 for end_node"); + LUCI_ASSERT(strides_node->rank() == 1, "Only support rank 1 for strides_node"); - auto op_params = BuildStridedSliceParams(node); loco::TensorShape input_shape = luci::shape_get(input_node).as<loco::TensorShape>(); - uint32_t num_input_axes = input_shape.rank(); - assert(begin_node->size<S32>() <= num_input_axes); - assert(end_node->size<S32>() <= num_input_axes); - assert(strides_node->size<S32>() <= num_input_axes); - for (uint32_t i = 0; i < strides_node->size<S32>(); i++) - { - LUCI_ASSERT(strides_node->at<S32>(i) != 0, "Stride value has to be non-zero"); - } + assert(begin_node->size<S32>() <= input_shape.rank()); + assert(end_node->size<S32>() <= input_shape.rank()); + assert(strides_node->size<S32>() <= input_shape.rank()); - uint32_t shape_size = 0; - std::array<int32_t, 16> output_shape_data; + StridedSliceContext op_context(node); + auto op_params = BuildStridedSliceParams(&op_context); + auto effective_input_shape = op_context.effective_input_shape; + std::vector<int32_t> output_shape_vector; - for (uint32_t idx = 0; idx < num_input_axes; ++idx) + for (int32_t idx = effective_input_shape.rank() - 1; idx >= 0; --idx) { - int32_t begin = StartForAxis(op_params, input_shape, idx); - int32_t end = StopForAxis(op_params, input_shape, idx, begin); - if (end < 0) - end = input_shape.dim(idx).value() + end + 1; + int32_t stride = op_params.strides[idx]; + LUCI_ASSERT(stride != 0, "stride value has to be non-zero"); - // This is valid for both positive and negative strides - int32_t stride = strides_node->at<S32>(idx); - int32_t dim_shape = std::ceil(static_cast<float>(end - begin) / stride); - assert(dim_shape > 0); + int32_t begin = StartForAxis(op_params, effective_input_shape, idx); + int32_t end = StopForAxis(op_params, effective_input_shape, idx, begin); // When shrinking an axis, the end position does not matter (and can be // incorrect when negative indexing is used, see Issue #19260). Always use // begin + 1 to generate a length 1 slice, since begin has - // already been adjusted for negative indices by StartForAxis. - const bool shrink_axis = node->shrink_axis_mask() & (1 << idx); + // already been adjusted for negative indices by GetBeginValueAtIndex. + const bool shrink_axis = op_params.shrink_axis_mask & (1 << idx); if (shrink_axis) { - assert(dim_shape == 1); + end = begin + 1; } - else + + // This is valid for both positive and negative strides + int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride)); + dim_shape = dim_shape < 0 ? 0 : dim_shape; + if (!shrink_axis) { - output_shape_data[shape_size++] = dim_shape; + output_shape_vector.push_back(dim_shape); } } + auto shape_size = output_shape_vector.size(); output_shape.rank(shape_size); for (uint32_t idx = 0; idx < shape_size; ++idx) { - output_shape.dim(idx) = output_shape_data[idx]; + // reverse copy + output_shape.dim(idx) = output_shape_vector.at(shape_size - 1u - idx); } return output_shape; diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst index 94e723f21..09a25ff08 100644 --- a/compiler/luci/tests/test.lst +++ b/compiler/luci/tests/test.lst @@ -39,6 +39,7 @@ addread(Conv2D_003) addread(Conv2D_U8_000) addread(Conv2D_U8_001) addread(Cos_000) +addread(Densify_000) addread(DepthToSpace_000) addread(DepthwiseConv2D_000) addread(DepthwiseConv2D_U8_000) @@ -265,6 +266,7 @@ addwrite(Conv2D_003) addwrite(Conv2D_U8_000) addwrite(Conv2D_U8_001) addwrite(Cos_000) +addwrite(Densify_000) addwrite(DepthToSpace_000) addwrite(DepthwiseConv2D_000) addwrite(DepthwiseConv2D_U8_000) diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h index d3ffc23e5..7a1ba2b2f 100644 --- a/compiler/mio-circle04/include/mio_circle/Helper.h +++ b/compiler/mio-circle04/include/mio_circle/Helper.h @@ -19,6 +19,8 @@ #include <mio/circle/schema_generated.h> +#include <vector> + namespace mio { namespace circle @@ -31,6 +33,21 @@ std::string opcode_name(const ::circle::OperatorCode *opcode); const char *tensor_type(const ::circle::Tensor *tensor); const char *tensor_name(const ::circle::Tensor *tensor); +template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array) +{ + if (flat_array == nullptr) + { + throw std::runtime_error("flat array is nullptr"); + } + + std::vector<T> ret(flat_array->Length()); + for (uint32_t i = 0; i < flat_array->Length(); i++) + { + ret[i] = flat_array->Get(i); + } + return ret; +} + } // namespace circle } // namespace mio diff --git a/compiler/circledump/src/Read.h b/compiler/mio-circle04/include/mio_circle/Reader.h index 05b0e5072..630646732 100644 --- a/compiler/circledump/src/Read.h +++ b/compiler/mio-circle04/include/mio_circle/Reader.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CIRCLEREAD_READ_H__ -#define __CIRCLEREAD_READ_H__ +#ifndef __MIO_CIRCLE04_READER_H__ +#define __MIO_CIRCLE04_READER_H__ #include <mio/circle/schema_generated.h> @@ -23,23 +23,14 @@ #include <string> #include <vector> -namespace circleread -{ +// NOTE Reader class originated from circledump and for circle-tensordump +// where this class has more work to be done for stability +// as the tools are for developers not customores. -template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array) +namespace mio +{ +namespace circle { - if (flat_array == nullptr) - { - throw std::runtime_error("flat array is nullptr"); - } - - std::vector<T> ret(flat_array->Length()); - for (uint32_t i = 0; i < flat_array->Length(); i++) - { - ret[i] = flat_array->Get(i); - } - return ret; -} /** * @brief Loads Circle file and provides helpers to access attributes @@ -47,36 +38,39 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T class Reader { private: - using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>; - using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>; - using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>; - using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>; - using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>; - using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>; + using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>; + using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>; + using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>; + using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>; + using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>; + using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>; public: - Reader(const circle::Model *model); + Reader(const ::circle::Model *model); Reader() = delete; public: uint32_t version() const { return _version; } - const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; } + const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; } const CircleBuffers_t *buffers() { return _buffers; } const CircleTensors_t *tensors() { return _tensors; } const CircleOperators_t *operators() { return _operators; } const std::vector<int32_t> &inputs() const { return _inputs; } const std::vector<int32_t> &outputs() const { return _outputs; } - const circle::DataFormat &data_format() const { return _data_format; } + const ::circle::DataFormat &data_format() const { return _data_format; } const CircleMetadata_t *metadata() const { return _metadata; } const CircleSignatureDef_t *signature_defs() const { return _signature_defs; } uint32_t num_subgraph() const { return _subgraphs->Length(); } size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); - circle::BuiltinOperator builtin_code(const circle::Operator *op) const; - std::string opcode_name(const circle::Operator *op) const; + ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const; + std::string opcode_name(const ::circle::Operator *op) const; + std::vector<int32_t> outputs(const ::circle::Operator *op) const; + std::string tensor_name(const ::circle::Tensor *tensor) const; + std::string tensor_dtype(const ::circle::Tensor *tensor) const; public: bool select_subgraph(uint32_t subgraph); @@ -95,12 +89,13 @@ private: uint32_t _subgraph_index = 0; std::string _subgraph_name; - std::vector<const circle::OperatorCode *> _op_codes; + std::vector<const ::circle::OperatorCode *> _op_codes; std::vector<int32_t> _inputs; std::vector<int32_t> _outputs; - circle::DataFormat _data_format = circle::DataFormat::DataFormat_CHANNELS_FIRST; + ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST; }; -} // namespace circleread +} // namespace circle +} // namespace mio -#endif // __CIRCLEREAD_READ_H__ +#endif // __MIO_CIRCLE04_READER_H__ diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/mio-circle04/src/Reader.cpp index 0e2865254..880ffaec8 100644 --- a/compiler/circle-inspect/src/Reader.cpp +++ b/compiler/mio-circle04/src/Reader.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +14,29 @@ * limitations under the License. */ -#include "Reader.h" - -#include <mio_circle/Helper.h> +#include "mio_circle/Reader.h" +#include "mio_circle/Helper.h" #include <sstream> #include <string> -namespace circleinspect +namespace mio +{ +namespace circle { -Reader::Reader(const circle::Model *model) +Reader::Reader(const ::circle::Model *model) { + if (model == nullptr) + { + throw std::runtime_error("Invalid model"); + } + + _version = model->version(); _subgraphs = model->subgraphs(); _buffers = model->buffers(); + _metadata = model->metadata(); + _signature_defs = model->signature_defs(); auto opcodes = model->operator_codes(); for (const ::circle::OperatorCode *opcode : *opcodes) @@ -64,20 +73,20 @@ size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) return 0; } -circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const +::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const { uint32_t index = op->opcode_index(); assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); + const ::circle::OperatorCode *opcode = _op_codes.at(index); return mio::circle::builtin_code_neutral(opcode); } -std::string Reader::opcode_name(const circle::Operator *op) const +std::string Reader::opcode_name(const ::circle::Operator *op) const { uint32_t index = op->opcode_index(); assert(index < _op_codes.size()); - const circle::OperatorCode *opcode = _op_codes.at(index); + const ::circle::OperatorCode *opcode = _op_codes.at(index); if (!mio::circle::is_valid(opcode)) { @@ -89,18 +98,24 @@ std::string Reader::opcode_name(const circle::Operator *op) const return mio::circle::opcode_name(opcode); } -std::string Reader::tensor_name(const circle::Tensor *tensor) const +std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const +{ + return as_index_vector(op->outputs()); +} + +std::string Reader::tensor_name(const ::circle::Tensor *tensor) const { return mio::circle::tensor_name(tensor); } -std::string Reader::tensor_dtype(const circle::Tensor *tensor) const +std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const { return mio::circle::tensor_type(tensor); } bool Reader::select_subgraph(uint32_t sgindex) { + _subgraph_index = sgindex; _tensors = nullptr; _operators = nullptr; @@ -113,10 +128,14 @@ bool Reader::select_subgraph(uint32_t sgindex) return false; } - const circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; + const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; + + auto name = subgraph->name(); + _subgraph_name = name ? name->c_str() : "(noname)"; _tensors = subgraph->tensors(); _operators = subgraph->operators(); + _data_format = subgraph->data_format(); _inputs = as_index_vector(subgraph->inputs()); _outputs = as_index_vector(subgraph->outputs()); @@ -124,4 +143,5 @@ bool Reader::select_subgraph(uint32_t sgindex) return true; } -} // namespace circleinspect +} // namespace circle +} // namespace mio diff --git a/compiler/mio-circle04/src/Reader.test.cpp b/compiler/mio-circle04/src/Reader.test.cpp new file mode 100644 index 000000000..104454a62 --- /dev/null +++ b/compiler/mio-circle04/src/Reader.test.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Reader.h" + +#include <flatbuffers/flatbuffers.h> +#include <gtest/gtest.h> + +class mio_circle04_reader_test : public ::testing::Test +{ +protected: + void initialization_emty(void) + { + _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec); + circle::FinishModelBuffer(_fbb, _model); + } + + const circle::Model *circleModel(void) + { + auto ptr = _fbb.GetBufferPointer(); + return circle::GetModel(ptr); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + flatbuffers::Offset<circle::Model> _model; + std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec; +}; + +TEST_F(mio_circle04_reader_test, null_Model_NEG) +{ + EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error); +} + +TEST_F(mio_circle04_reader_test, empty_Model) +{ + initialization_emty(); + + const circle::Model *model = circleModel(); + EXPECT_NE(nullptr, model); + + mio::circle::Reader reader(model); + + SUCCEED(); +} + +// TODO add more tests diff --git a/compiler/mio-tflite/README.md b/compiler/mio-tflite/README.md index 187b1a5c6..c717ab877 100644 --- a/compiler/mio-tflite/README.md +++ b/compiler/mio-tflite/README.md @@ -1,3 +1,5 @@ # mio-tflite _mio-tflite_ provides a library to access TensorFlow lite model files + +NOTE: _mio-tflite_ is currently obsolete diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md index 970569b47..86d2998ed 100644 --- a/compiler/mio-tflite260/README.md +++ b/compiler/mio-tflite260/README.md @@ -1,3 +1,5 @@ # mio-tflite260 _mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0. + +NOTE: _mio-tflite260_ is currently obsolete diff --git a/compiler/mir/include/mir/Graph.h b/compiler/mir/include/mir/Graph.h index bf94cfb14..37bfdb361 100644 --- a/compiler/mir/include/mir/Graph.h +++ b/compiler/mir/include/mir/Graph.h @@ -103,6 +103,10 @@ private: /** * @brief Returns nodes of the graph sorted topologically. + * @note Sorting order priority + * 1) Graph input node (input index order) + * 2) Constant node (unordered - cannot predict order) + * 3) Ready node (unordered - cannot predict order) */ std::vector<Operation *> getSortedNodes(Graph *graph); diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp index 04b005de4..05d6dc9bd 100644 --- a/compiler/mir/src/Graph.cpp +++ b/compiler/mir/src/Graph.cpp @@ -44,9 +44,16 @@ std::vector<Operation *> getSortedNodes(Graph *graph) std::deque<Operation *> ready_nodes; std::unordered_map<Operation *, std::size_t> num_visited_input_edges; + // Use input vector first to maintain correct input order + for (Operation *op : graph->getInputs()) + { + ready_nodes.push_back(op); + } + for (Operation *op : graph->getNodes()) { - if (op->getNumInputs() == 0) + // Skip already pushed input node + if ((op->getNumInputs() == 0) && (op->getType() != Operation::Type::input)) { ready_nodes.push_back(op); } diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp index 92ab99488..244c92aa8 100644 --- a/compiler/mir2loco/src/mir2loco.test.cpp +++ b/compiler/mir2loco/src/mir2loco.test.cpp @@ -383,28 +383,49 @@ TEST_F(TestTransformer_mir2loco, Conv2D_Test) auto loco_graph = transformer.transform(&mir_graph); loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0)); - loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1)); - loco::FeatureEncode *encode_node = - dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2)); - loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3)); - loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4)); - loco::FeatureDecode *decode_node = - dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5)); - loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6)); - ASSERT_NE(pull_node, nullptr); + + // ConstGen: Only one ConstGen node + // We can convince that this node is input of FilterEncode because this is only ConstGen node + loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1)); ASSERT_NE(const_node, nullptr); - ASSERT_NE(filter_node, nullptr); + + // FeatureEncode + auto pull_uses = loco::succs(pull_node); + ASSERT_EQ(pull_uses.size(), 1); + loco::FeatureEncode *encode_node = dynamic_cast<loco::FeatureEncode *>(*pull_uses.begin()); ASSERT_NE(encode_node, nullptr); - ASSERT_NE(conv_node, nullptr); - ASSERT_NE(decode_node, nullptr); - ASSERT_NE(push_node, nullptr); ASSERT_EQ(encode_node->input(), pull_node); - ASSERT_EQ(filter_node->input(), const_node); + + // Conv2D + auto encode_uses = loco::succs(encode_node); + ASSERT_EQ(encode_uses.size(), 1); + loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(*encode_uses.begin()); + ASSERT_NE(conv_node, nullptr); ASSERT_EQ(conv_node->ifm(), encode_node); + + // FilterEncode + auto const_uses = loco::succs(const_node); + ASSERT_EQ(const_uses.size(), 1); + loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(*const_uses.begin()); + ASSERT_NE(filter_node, nullptr); + ASSERT_EQ(filter_node->input(), const_node); ASSERT_EQ(conv_node->ker(), filter_node); + + // FeatureDecode + auto conv_uses = loco::succs(conv_node); + ASSERT_EQ(conv_uses.size(), 1); + loco::FeatureDecode *decode_node = dynamic_cast<loco::FeatureDecode *>(*conv_uses.begin()); + ASSERT_NE(decode_node, nullptr); ASSERT_EQ(decode_node->input(), conv_node); + + // Push + auto decode_uses = loco::succs(decode_node); + ASSERT_EQ(decode_uses.size(), 1); + loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin()); + ASSERT_NE(push_node, nullptr); ASSERT_EQ(push_node->from(), decode_node); + // Check params ASSERT_EQ(conv_node->pad()->top(), 5); ASSERT_EQ(conv_node->pad()->left(), 9); diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp index 333f0f6a9..0659fd165 100644 --- a/compiler/moco/import/src/Importer.cpp +++ b/compiler/moco/import/src/Importer.cpp @@ -190,7 +190,7 @@ std::unique_ptr<loco::Graph> Importer::import(const ModelSignature &signature, convert_graph(*source_ptr, signature, tf_graph_def, graph.get()); - return std::move(graph); + return graph; } } // namespace moco diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp index 55c0e0c64..b59a505b5 100644 --- a/compiler/moco/lang/src/IR/TFNode.cpp +++ b/compiler/moco/lang/src/IR/TFNode.cpp @@ -17,6 +17,7 @@ #include "moco/IR/TFNode.h" #include "moco/IR/TFDialect.h" +#include <limits> #include <memory> #include <cassert> diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt index 8732340ae..90e989a00 100644 --- a/compiler/one-cmds/CMakeLists.txt +++ b/compiler/one-cmds/CMakeLists.txt @@ -8,7 +8,9 @@ set(ONE_COMMAND_FILES one-optimize one-quantize one-pack + one-partition one-profile + one-infer one-codegen one-prepare-venv onecc @@ -74,7 +76,11 @@ endforeach(ONE_UTILITY) # make python directory set(ONE_PYTHON_FILES constant.py - make_cmd.py) + make_cmd.py + CfgRunner.py + OptionBuilder.py + TopologicalSortHelper.py + WorkflowRunner.py) foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES}) diff --git a/compiler/one-cmds/dummy-driver/CMakeLists.txt b/compiler/one-cmds/dummy-driver/CMakeLists.txt index 690a60776..2552a02db 100644 --- a/compiler/one-cmds/dummy-driver/CMakeLists.txt +++ b/compiler/one-cmds/dummy-driver/CMakeLists.txt @@ -1,16 +1,25 @@ # dummy driver for interface test set(DUMMY_DRIVER_SRC src/dummy-compile.cpp) set(HELP_DRIVER_SRC src/help-compile.cpp) +set(DUMMY_INFER_SRC src/dummy-infer.cpp) +set(DUMMY_INFER_V2_SRC src/dummy-inferV2.cpp) +set(HELP_INFER_SRC src/help-infer.cpp) set(DUMMY_PROFILE_SRC src/dummy-profile.cpp) set(HELP_PROFILE_SRC src/help-profile.cpp) add_executable(dummy-compile ${DUMMY_DRIVER_SRC}) add_executable(help-compile ${HELP_DRIVER_SRC}) +add_executable(dummy-infer ${DUMMY_INFER_SRC}) +add_executable(dummy-inferV2 ${DUMMY_INFER_V2_SRC}) +add_executable(help-infer ${HELP_INFER_SRC}) add_executable(dummy-profile ${DUMMY_PROFILE_SRC}) add_executable(help-profile ${HELP_PROFILE_SRC}) set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile") set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile") +set(DUMMY_INFER "${CMAKE_CURRENT_BINARY_DIR}/dummy-infer") +set(DUMMY_INFER_V2 "${CMAKE_CURRENT_BINARY_DIR}/dummy-inferV2") +set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer") set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile") set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile") @@ -26,6 +35,24 @@ install(FILES ${HELP_DRIVER} WORLD_READ WORLD_EXECUTE DESTINATION test) +install(FILES ${DUMMY_INFER} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + +install(FILES ${DUMMY_INFER_V2} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + +install(FILES ${HELP_INFER} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION test) + install(FILES ${DUMMY_PROFILE} PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE diff --git a/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp new file mode 100644 index 000000000..60f5faefa --- /dev/null +++ b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * dummy-infer only tests its interface rather than its functionality. + * + * ./dummy-infer ${INPUT_NAME} + * dummy-infer dummy output!!! + */ + +#include <iostream> + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + std::cout << "dummy-infer dummy output!!!" << std::endl; + + return EXIT_SUCCESS; +} diff --git a/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp new file mode 100644 index 000000000..4b93c70a3 --- /dev/null +++ b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * dummy-infer only tests its interface rather than its functionality. + * + * ./dummy-infer ${INPUT_NAME} + * Do inference of ${INPUT_NAME} + */ + +#include <iostream> + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + std::cout << "Do inference of " + std::string(argv[1]) << std::endl; + + return EXIT_SUCCESS; +} diff --git a/compiler/one-cmds/dummy-driver/src/help-infer.cpp b/compiler/one-cmds/dummy-driver/src/help-infer.cpp new file mode 100644 index 000000000..821d368d4 --- /dev/null +++ b/compiler/one-cmds/dummy-driver/src/help-infer.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * help-infer prints dummy help message. + * + * $ ./help-infer -h + * HELP MESSAGE!! + */ + +#include <iostream> +#include <fstream> +#include <string> + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + std::string opt_h{"-h"}; + std::string argv_1{argv[1]}; + + if (opt_h != argv_1) + return EXIT_FAILURE; + + std::cout << "HELP MESSAGE!!" << std::endl; + + return EXIT_SUCCESS; +} diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index ebc165167..2352bbd7a 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -153,6 +153,7 @@ Current transformation options are - expand_broadcast_const : This will expand broadcastable constant node inputs - fold_add_v2 : This removes AddV2 operation which can be folded - fold_cast : This removes Cast operation which can be folded +- fold_densify: This removes Densify operator which can be folded - fold_dequantize : This removes Dequantize operation which can be folded - fold_dwconv : This folds Depthwise Convolution operation which can be folded - fold_gather : This removes Gather operation which can be folded @@ -205,10 +206,6 @@ Current transformation options are - transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator. - transform_min_relu_to_relu6: This will transform Minimum(6)-Relu pattern to Relu6 operator. -There are options to enable multiple options at once for convenience. -- O1: fuse_bcq, fuse_instnorm, resolve_customop_add, resolve_customop_batchmatmul, - resolve_customop_matmul, remove_redundant_transpose, substitute_pack_to_reshape - one-quantize ------------ diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build index 5c313b44b..4b1f98070 100644 --- a/compiler/one-cmds/one-build +++ b/compiler/one-cmds/one-build @@ -22,7 +22,6 @@ import argparse import configparser import os -import subprocess import sys import utils as _utils @@ -83,6 +82,7 @@ def _get_driver_name(driver_name): 'one-import-onnx': 'one-import-onnx', 'one-optimize': 'one-optimize', 'one-quantize': 'one-quantize', + 'one-partition': 'one-partition', 'one-pack': 'one-pack', 'one-codegen': 'one-codegen' }[driver_name] @@ -157,7 +157,8 @@ def main(): bin_dir = os.path.dirname(os.path.realpath(__file__)) import_drivers_dict = _utils._detect_one_import_drivers(bin_dir) transform_drivers = [ - 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile' + 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile', + 'one-partition' ] _verify_cfg(import_drivers_dict, config) diff --git a/compiler/one-cmds/one-build.template.cfg b/compiler/one-cmds/one-build.template.cfg index e147896ef..42960811e 100644 --- a/compiler/one-cmds/one-build.template.cfg +++ b/compiler/one-cmds/one-build.template.cfg @@ -5,6 +5,7 @@ one-import-bcq=False one-import-onnx=False one-optimize=True one-quantize=False +one-parition=False one-pack=True one-codegen=False diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen index 726538d44..86e1632e6 100644 --- a/compiler/one-cmds/one-codegen +++ b/compiler/one-cmds/one-codegen @@ -25,9 +25,7 @@ import glob import itertools import ntpath import os -import subprocess import sys -import tempfile import shutil import utils as _utils diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq index ef89a9297..c3ef0b275 100644 --- a/compiler/one-cmds/one-import-bcq +++ b/compiler/one-cmds/one-import-bcq @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import tempfile @@ -160,9 +159,9 @@ def _convert(args): tmpdir, os.path.splitext( os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite' - tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, - generate_bcq_metadata_output_path, - tf2tfliteV2_output_path) + tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd( + args, tf2tfliteV2_path, generate_bcq_metadata_output_path, + tf2tfliteV2_output_path) try: output_arrays_idx = tf2tfliteV2_cmd.index('--output_arrays') tf2tfliteV2_cmd[output_arrays_idx + 1] = ','.join(bcq_output_arrays) @@ -177,8 +176,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx index eaa136197..ad19c2f59 100644 --- a/compiler/one-cmds/one-import-onnx +++ b/compiler/one-cmds/one-import-onnx @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import tempfile import onnx @@ -80,6 +79,12 @@ def _get_parser(): parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators') parser.add_argument( '--unroll_lstm', action='store_true', help='Unroll LSTM operators') + parser.add_argument( + '--keep_io_order', + action='store_true', + help= + 'Ensure generated circle model preserves the I/O order of the original onnx model.' + ) # save intermediate file(s) parser.add_argument( @@ -87,6 +92,12 @@ def _get_parser(): action='store_true', help='Save intermediate files to output folder') + # experimental options + parser.add_argument( + '--experimental_disable_batchmatmul_unfold', + action='store_true', + help='Experimental disable BatchMatMul unfold') + return parser @@ -124,6 +135,65 @@ def _apply_verbosity(verbosity): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +# The index of input/output is added in front of the name. For example, +# Original input names: 'a', 'c', 'b' +# Renamed: '0001_a', '0002_c', '0003_b' +# This will preserve I/O order after import. +def _remap_io_names(onnx_model): + # gather existing name of I/O and generate new name of I/O in sort order + input_nodes = [] + output_nodes = [] + remap_inputs = [] + remap_outputs = [] + initializers = [] + # some models may have initializers as inputs. ignore them. + for initializer in onnx_model.graph.initializer: + initializers.append(initializer.name) + for idx in range(0, len(onnx_model.graph.input)): + name = onnx_model.graph.input[idx].name + if not name in initializers: + input_nodes.append(name) + remap_inputs.append(format(idx + 1, '04d') + '_' + name) + for idx in range(0, len(onnx_model.graph.output)): + name = onnx_model.graph.output[idx].name + output_nodes.append(name) + remap_outputs.append(format(idx + 1, '04d') + '_' + name) + # change names for graph input + for i in range(len(onnx_model.graph.input)): + if onnx_model.graph.input[i].name in input_nodes: + to_rename = onnx_model.graph.input[i].name + idx = input_nodes.index(to_rename) + onnx_model.graph.input[i].name = remap_inputs[idx] + # change names of all nodes in the graph + for i in range(len(onnx_model.graph.node)): + # check node.input is to change to remap_inputs or remap_outputs + for j in range(len(onnx_model.graph.node[i].input)): + if onnx_model.graph.node[i].input[j] in input_nodes: + to_rename = onnx_model.graph.node[i].input[j] + idx = input_nodes.index(to_rename) + onnx_model.graph.node[i].input[j] = remap_inputs[idx] + if onnx_model.graph.node[i].input[j] in output_nodes: + to_rename = onnx_model.graph.node[i].input[j] + idx = output_nodes.index(to_rename) + onnx_model.graph.node[i].input[j] = remap_outputs[idx] + # check node.output is to change to remap_inputs or remap_outputs + for j in range(len(onnx_model.graph.node[i].output)): + if onnx_model.graph.node[i].output[j] in output_nodes: + to_rename = onnx_model.graph.node[i].output[j] + idx = output_nodes.index(to_rename) + onnx_model.graph.node[i].output[j] = remap_outputs[idx] + if onnx_model.graph.node[i].output[j] in input_nodes: + to_rename = onnx_model.graph.node[i].output[j] + idx = input_nodes.index(to_rename) + onnx_model.graph.node[i].output[j] = remap_inputs[idx] + # change names for graph output + for i in range(len(onnx_model.graph.output)): + if onnx_model.graph.output[i].name in output_nodes: + to_rename = onnx_model.graph.output[i].name + idx = output_nodes.index(to_rename) + onnx_model.graph.output[i].name = remap_outputs[idx] + + def _convert(args): _apply_verbosity(args.verbose) @@ -142,6 +212,13 @@ def _convert(args): options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn') options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm') onnx_legalizer.legalize(onnx_model, options) + if _utils._is_valid_attr(args, 'keep_io_order'): + _remap_io_names(onnx_model) + if _utils._is_valid_attr(args, 'save_intermediate'): + basename = os.path.basename(getattr(args, 'input_path')) + fixed_path = os.path.join(tmpdir, + os.path.splitext(basename)[0] + '~.onnx') + onnx.save(onnx_model, fixed_path) tf_savedmodel = onnx_tf.backend.prepare(onnx_model) savedmodel_name = os.path.splitext(os.path.basename( @@ -166,8 +243,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch index dbf1ba6d7..7f39e61bb 100644 --- a/compiler/one-cmds/one-import-pytorch +++ b/compiler/one-cmds/one-import-pytorch @@ -80,7 +80,8 @@ def _get_parser(): tf2tflite_group.add_argument('--converter_version', default='v2') parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators') - parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators') + parser.add_argument( + '--unroll_lstm', action='store_true', help='Unroll LSTM operators') # save intermediate file(s) parser.add_argument( @@ -338,8 +339,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf index 999255a34..6623fa6a4 100644 --- a/compiler/one-cmds/one-import-tf +++ b/compiler/one-cmds/one-import-tf @@ -21,8 +21,6 @@ import argparse import os -import subprocess -import sys import tempfile import onelib.make_cmd as _make_cmd @@ -152,8 +150,8 @@ def _convert(args): tmpdir, os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite' tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path, - getattr(args, 'input_path'), - tf2tfliteV2_output_path) + getattr(args, 'input_path'), + tf2tfliteV2_output_path) f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode()) @@ -163,8 +161,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - tf2tfliteV2_output_path, - getattr(args, 'output_path')) + tf2tfliteV2_output_path, + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite index 2d756bff6..3d96b117f 100644 --- a/compiler/one-cmds/one-import-tflite +++ b/compiler/one-cmds/one-import-tflite @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import onelib.make_cmd as _make_cmd @@ -83,8 +82,8 @@ def _convert(args): # make a command to convert from tflite to circle tflite2circle_path = os.path.join(dir_path, 'tflite2circle') tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path, - getattr(args, 'input_path'), - getattr(args, 'output_path')) + getattr(args, 'input_path'), + getattr(args, 'output_path')) f.write((' '.join(tflite2circle_cmd) + '\n').encode()) diff --git a/compiler/one-cmds/one-infer b/compiler/one-cmds/one-infer new file mode 100644 index 000000000..c7fcd8afd --- /dev/null +++ b/compiler/one-cmds/one-infer @@ -0,0 +1,224 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import copy +import glob +import itertools +import ntpath +import os +import sys + +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +def _get_backends_list(): + """ + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + The list where `one-infer` finds its backends + - `bin` folder where `one-infer` exists + - `backends` folder + + NOTE If there are backends of the same name in different places, + the closer to the top in the list, the higher the priority. + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + backend_set = set() + + # bin folder + files = [f for f in glob.glob(dir_path + '/*-infer')] + # backends folder + files += [f for f in glob.glob(dir_path + '/../backends/**/*-infer', recursive=True)] + # TODO find backends in `$PATH` + + backends_list = [] + for cand in files: + base = ntpath.basename(cand) + if (not base in backend_set) and os.path.isfile(cand) and os.access( + cand, os.X_OK): + backend_set.add(base) + backends_list.append(cand) + + return backends_list + + +def _search_backend_driver(driver): + """ + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + The list where `one-infer` finds its backend driver + - `bin` folder where `one-infer` exists + - `backends/**/bin/` folder + + NOTE If there are drivers of the same name in different places, + the closer to the top in the list, the higher the priority. + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + + # CASE 1: one/bin/{driver} is found + driver_path = dir_path + '/' + driver + if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK): + return driver_path + + # CASE 2: one/backends/**/bin/{driver} is found + for driver_path in glob.glob( + dir_path + '/../backends/**/bin/' + driver, recursive=True): + if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK): + return driver_path + + # CASE 3: {driver} is found in nowhere + return None + + +def _get_parser(backends_list): + infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER | -b BACKEND] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]' + parser = argparse.ArgumentParser( + description='command line tool to infer model', usage=infer_usage) + + _utils._add_default_arg(parser) + + # TODO: add tflite/onnx-infer driver to helper message when it is implemented + driver_help_message = 'backend inference driver name to execute' + parser.add_argument('-d', '--driver', type=str, help=driver_help_message) + + # get backend list in the directory + backends_name = [ntpath.basename(f) for f in backends_list] + if not backends_name: + backends_name_message = '(There is no available backend drivers)' + else: + backends_name_message = '(available backend drivers: ' + ', '.join( + backends_name) + ')' + backend_help_message = 'backend name to use ' + backends_name_message + parser.add_argument('-b', '--backend', type=str, help=backend_help_message) + + post_process_help_message = 'post processing script to convert I/O data to standard format' + parser.add_argument('--post-process', type=str, help=post_process_help_message) + + return parser + + +def _verify_arg(parser, args): + """verify given arguments""" + # `-d/--driver` and `-b/--backend` are mutually exclusive arguments. + if _utils._is_valid_attr(args, 'driver') and _utils._is_valid_attr(args, 'backend'): + parser.error( + '-d and -b options are mutually exclusive. Please use only one of them') + + missing = [] + if not _utils._is_valid_attr(args, 'driver') and not _utils._is_valid_attr( + args, 'backend'): + missing.append('{-d/--driver | -b/--backend}') + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + + +def _parse_arg(parser): + infer_args = [] + backend_args = [] + argv = copy.deepcopy(sys.argv) + # delete file name + del argv[0] + # split by '--' + args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x] + + # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [-b BACKEND] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER] + if len(args): + infer_args = args[0] + infer_args = parser.parse_args(infer_args) + backend_args = backend_args if len(args) < 2 else args[1] + # print version + if len(args) and infer_args.version: + _utils._print_version_and_exit(__file__) + + return infer_args, backend_args + + +def _get_executable(args, backends_list): + driver = _utils._is_valid_attr(args, 'driver') + if driver: + executable = _search_backend_driver(driver) + if executable: + return executable + else: + raise FileNotFoundError(driver + ' not found') + + if _utils._is_valid_attr(args, 'backend'): + backend_base = getattr(args, 'backend') + '-infer' + for cand in backends_list: + if ntpath.basename(cand) == backend_base: + return cand + raise FileNotFoundError(backend_base + ' not found') + + +def main(): + # get backend list + backends_list = _get_backends_list() + + # parse arguments + parser = _get_parser(backends_list) + args, backend_args = _parse_arg(parser) + + # parse configuration file + _utils._parse_cfg(args, 'one-infer') + + # verify arguments + _verify_arg(parser, args) + + # make a command to run given backend driver + driver_path = _get_executable(args, backends_list) + infer_cmd = [driver_path] + backend_args + if _utils._is_valid_attr(args, 'command'): + infer_cmd += getattr(args, 'command').split() + + # run backend driver + _utils._run(infer_cmd, err_prefix=ntpath.basename(driver_path)) + + # run post process script if it's given + if _utils._is_valid_attr(args, 'post_process'): + # NOTE: the given python script will be executed by venv of ONE + python_path = sys.executable + post_process_command = [python_path] + getattr(args, + 'post_process').strip().split(' ') + _utils._run(post_process_command, err_prefix='one-infer') + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-init b/compiler/one-cmds/one-init new file mode 100644 index 000000000..04c4534cd --- /dev/null +++ b/compiler/one-cmds/one-init @@ -0,0 +1,280 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import copy +import glob +import itertools +import ntpath +import os +import sys + +import configparser +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +class CommentableConfigParser(configparser.ConfigParser): + """ + ConfigParser where comment can be stored + In Python ConfigParser, comment in ini file ( starting with ';') is considered a key of which + value is None. + Ref: https://stackoverflow.com/questions/6620637/writing-comments-to-files-with-configparser + """ + + def __init__(self): + # allow_no_value=True to add comment + # ref: https://stackoverflow.com/a/19432072 + configparser.ConfigParser.__init__(self, allow_no_value=True) + self.optionxform = str + + def add_comment(self, section, comment): + comment_sign = ';' + self[section][f'{comment_sign} {comment}'] = None + + +def _get_backends_list(): + """ + [one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test + + The list where `one-init` finds its backends + - `bin` folder where `one-init` exists + - `backends` folder + + NOTE If there are backends of the same name in different places, + the closer to the top in the list, the higher the priority. + """ + dir_path = os.path.dirname(os.path.realpath(__file__)) + backend_set = set() + + # bin folder + files = [f for f in glob.glob(dir_path + '/*-init')] + # backends folder + files += [f for f in glob.glob(dir_path + '/../backends/**/*-init', recursive=True)] + # TODO find backends in `$PATH` + + backends_list = [] + for cand in files: + base = ntpath.basename(cand) + if (not base in backend_set) and os.path.isfile(cand) and os.access( + cand, os.X_OK): + backend_set.add(base) + backends_list.append(cand) + + return backends_list + + +# TODO Add support for TF graphdef and bcq +def _get_parser(backends_list): + init_usage = ( + 'one-init [-h] [-v] [-V] ' + '[-i INPUT_PATH] ' + '[-o OUTPUT_PATH] ' + '[-m MODEL_TYPE] ' + '[-b BACKEND] ' + # args for onnx model + '[--convert_nchw_to_nhwc] ' + '[--nchw_to_nhwc_input_shape] ' + '[--nchw_to_nhwc_output_shape] ' + # args for backend driver + '[--] [COMMANDS FOR BACKEND DRIVER]') + """ + NOTE + layout options for onnx model could be difficult to users. + In one-init, we could consider easier args for the the above three: + For example, we could have another option, e.g., --input_img_layout LAYOUT + - When LAYOUT is NHWC, apply 'nchw_to_nhwc_input_shape=True' into cfg + - When LAYOUT is NCHW, apply 'nchw_to_nhwc_input_shape=False' into cfg + """ + + parser = argparse.ArgumentParser( + description='Command line tool to generate initial cfg file. ' + 'Currently tflite and onnx models are supported', + usage=init_usage) + + _utils._add_default_arg_no_CS(parser) + + parser.add_argument( + '-i', '--input_path', type=str, help='full filepath of the input model file') + parser.add_argument( + '-o', '--output_path', type=str, help='full filepath of the output cfg file') + parser.add_argument( + '-m', + '--model_type', + type=str, + help=('type of input model: "onnx", "tflite". ' + 'If the file extension passed to --input_path is ' + '".tflite" or ".onnx", this arg can be omitted.')) + + onnx_group = parser.add_argument_group('arguments when model type is onnx') + onnx_group.add_argument( + '--convert_nchw_to_nhwc', + action='store_true', + help= + 'Convert NCHW operators to NHWC under the assumption that input model is NCHW.') + onnx_group.add_argument( + '--nchw_to_nhwc_input_shape', + action='store_true', + help='Convert the input shape of the model (argument for convert_nchw_to_nhwc)') + onnx_group.add_argument( + '--nchw_to_nhwc_output_shape', + action='store_true', + help='Convert the output shape of the model (argument for convert_nchw_to_nhwc)') + + # get backend list in the directory + backends_name = [ntpath.basename(f) for f in backends_list] + if not backends_name: + backends_name_message = '(There is no available backend drivers)' + else: + backends_name_message = '(available backend drivers: ' + ', '.join( + backends_name) + ')' + backend_help_message = 'backend name to use ' + backends_name_message + parser.add_argument('-b', '--backend', type=str, help=backend_help_message) + + return parser + + +def _verify_arg(parser, args): + # check if required arguments is given + missing = [] + if not _utils._is_valid_attr(args, 'input_path'): + missing.append('-i/--input_path') + if not _utils._is_valid_attr(args, 'output_path'): + missing.append('-o/--output_path') + if not _utils._is_valid_attr(args, 'backend'): + missing.append('-b/--backend') + + if _utils._is_valid_attr(args, 'model_type'): + # TODO Support model types other than onnx and tflite (e.g., TF) + if getattr(args, 'model_type') not in ['onnx', 'tflite']: + parser.error('Allowed value for --model_type: "onnx" or "tflite"') + + if _utils._is_valid_attr(args, 'nchw_to_nhwc_input_shape'): + if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'): + missing.append('--convert_nchw_to_nhwc') + if _utils._is_valid_attr(args, 'nchw_to_nhwc_output_shape'): + if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'): + missing.append('--convert_nchw_to_nhwc') + + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + + +def _parse_arg(parser): + init_args = [] + backend_args = [] + argv = copy.deepcopy(sys.argv) + # delete file name + del argv[0] + # split by '--' + args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x] + + # one-init [-h] [-v] ... + if len(args): + init_args = args[0] + init_args = parser.parse_args(init_args) + backend_args = backend_args if len(args) < 2 else args[1] + # print version + if len(args) and init_args.version: + _utils._print_version_and_exit(__file__) + + return init_args, backend_args + + +def _get_executable(args, backends_list): + if _utils._is_valid_attr(args, 'backend'): + backend_base = getattr(args, 'backend') + '-init' + for cand in backends_list: + if ntpath.basename(cand) == backend_base: + return cand + raise FileNotFoundError(backend_base + ' not found') + + +# TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354) +def _generate(): + # generate cfg file + config = CommentableConfigParser() + + def _add_onecc_sections(): + pass # NYI + + def _gen_import(): + pass # NYI + + def _gen_optimize(): + pass # NYI + + def _gen_quantize(): + pass # NYI + + def _gen_codegen(): + pass # NYI + + # + # NYI: one-profile, one-partition, one-pack, one-infer + # + + _add_onecc_sections() + + _gen_import() + _gen_optimize() + _gen_quantize() + _gen_codegen() + + with open(args.output_path, 'w') as f: + config.write(f) + + +def main(): + # get backend list + backends_list = _get_backends_list() + + # parse arguments + parser = _get_parser(backends_list) + args, backend_args = _parse_arg(parser) + + # verify arguments + _verify_arg(parser, args) + + # make a command to run given backend driver + driver_path = _get_executable(args, backends_list) + init_cmd = [driver_path] + backend_args + + # run backend driver + _utils._run(init_cmd, err_prefix=ntpath.basename(driver_path)) + + #TODO generate cfg file + + raise NotImplementedError("NYI") + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize index 8b1f3f7be..481fc8459 100644 --- a/compiler/one-cmds/one-optimize +++ b/compiler/one-cmds/one-optimize @@ -21,7 +21,6 @@ import argparse import os -import subprocess import sys import onelib.constant as _constant @@ -83,6 +82,14 @@ def _verify_arg(parser, args): if len(missing): parser.error('the following arguments are required: ' + ' '.join(missing)) + # default has pre-defined optimization options + default = _get_parser().parse_args() + + # check if unrecognized arguments are given + diff = set(dir(args)) - set(dir(default)) + if len(diff): + parser.error('the following arguments are unrecognized: ' + ' '.join(diff)) + def _parse_arg(parser): args = parser.parse_args() @@ -102,8 +109,8 @@ def _optimize(args): # make a command to optimize circle model circle2circle_path = os.path.join(dir_path, 'circle2circle') circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path, - getattr(args, 'input_path'), - getattr(args, 'output_path')) + getattr(args, 'input_path'), + getattr(args, 'output_path')) # verbose if _utils._is_valid_attr(args, 'verbose'): diff --git a/compiler/one-cmds/one-pack b/compiler/one-cmds/one-pack index 133207de0..5cab7c737 100644 --- a/compiler/one-cmds/one-pack +++ b/compiler/one-cmds/one-pack @@ -21,9 +21,7 @@ import argparse import os -import subprocess import sys -import tempfile import utils as _utils diff --git a/compiler/one-cmds/one-partition b/compiler/one-cmds/one-partition new file mode 100644 index 000000000..c0d71e5d9 --- /dev/null +++ b/compiler/one-cmds/one-partition @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # ''' +''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # ''' +''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # ''' +''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # ''' +''''exit 255 # ''' + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import configparser +import os +import sys + +import utils as _utils + +# TODO Find better way to suppress trackback on error +sys.tracebacklimit = 0 + + +def _get_parser(): + parser = argparse.ArgumentParser( + description='command line tool to partition circle model by multiple backends') + + _utils._add_default_arg(parser) + + parser.add_argument( + '--backends', type=str, help='backends in CSV to use for partitioning') + parser.add_argument('--default', type=str, help='default backend to assign') + + parser.add_argument( + '--part_file', type=str, help='partition file which provides backend to assign') + parser.add_argument('--input_file', type=str, help='input circle model filename') + parser.add_argument( + '--work_path', + type=str, + help='work path of partition, input files exist and output files are produced') + + return parser + + +def _parse_arg(parser): + args = parser.parse_args() + # print version + if args.version: + _utils._print_version_and_exit(__file__) + + return args + + +def _verify_arg(parser, args): + """verify given arguments""" + # check if required arguments is given + missing = [] + if not _utils._is_valid_attr(args, 'part_file'): + missing.append('part_file') + if not _utils._is_valid_attr(args, 'input_file'): + missing.append('input_file') + if len(missing): + parser.error('the following arguments are required: ' + ' '.join(missing)) + return + + +def _partition(args): + # get file path to log + bin_path = os.path.dirname(os.path.realpath(__file__)) + cur_path = os.getcwd() + partition_path = os.path.join(cur_path, args.part_file) + logfile_path = partition_path + '.log' + + with open(logfile_path, 'wb', buffering=0) as f: + # make a command to package circle model and metadata into nnpackage + circle_partitioner_path = os.path.join(bin_path, 'circle-partitioner') + + cmd = [os.path.expanduser(circle_partitioner_path)] + + if _utils._is_valid_attr(args, 'backends'): + cmd.append('--backends') + cmd.append(getattr(args, 'backends')) + if _utils._is_valid_attr(args, 'default'): + cmd.append('--default') + cmd.append(getattr(args, 'default')) + if _utils._is_valid_attr(args, 'work_path'): + cmd.append('--work_path') + cmd.append(getattr(args, 'work_path')) + + cmd.append('--part_file') + cmd.append(args.part_file) + cmd.append('--input_file') + cmd.append(args.input_file) + + f.write((' '.join(cmd) + '\n').encode()) + + # run circle-partitoner + _utils._run(cmd, err_prefix='circle-partitioner', logfile=f) + + +def main(): + # parse arguments + parser = _get_parser() + args = _parse_arg(parser) + + # parse configuration file + _utils._parse_cfg(args, 'one-partition') + + if _utils._is_valid_attr(args, 'config'): + config_path = getattr(args, 'config') + _utils._parse_cfg_and_overwrite(config_path, 'one-partition', args) + + # verify arguments + _verify_arg(parser, args) + + # do partition + _partition(args) + + +if __name__ == '__main__': + _utils._safemain(main, __file__) diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv index 0f75166a7..b435671f4 100644 --- a/compiler/one-cmds/one-prepare-venv +++ b/compiler/one-cmds/one-prepare-venv @@ -41,6 +41,7 @@ VER_ONNX_TF=1.10.0 # Install tensorflow PIP_TRUSTED_HOST="--trusted-host pypi.org " +PIP_TRUSTED_HOST+="--trusted-host pypi.python.org " PIP_TRUSTED_HOST+="--trusted-host files.pythonhost.org " PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org " @@ -62,7 +63,8 @@ else ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW} fi ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability +# TODO remove version fix, https://github.com/Samsung/ONE/issues/9240 +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.16.0 # Install PyTorch and ONNX related # NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL. @@ -72,6 +74,8 @@ TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html" if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}" fi +# TODO remove torch message +echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'" ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL} ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} @@ -84,3 +88,7 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then else ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF} fi + +# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051 +# TODO remove this when issue is resolved +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.20.1 diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile index ed6d8bd7a..b19c215ed 100644 --- a/compiler/one-cmds/one-profile +++ b/compiler/one-cmds/one-profile @@ -25,9 +25,7 @@ import glob import itertools import ntpath import os -import subprocess import sys -import tempfile import utils as _utils diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize index f2eff24bd..9282007d8 100644 --- a/compiler/one-cmds/one-quantize +++ b/compiler/one-cmds/one-quantize @@ -21,11 +21,12 @@ import argparse import os -import subprocess import sys import tempfile +import json import utils as _utils +from utils import Command # TODO Find better way to suppress trackback on error sys.tracebacklimit = 0 @@ -67,6 +68,12 @@ def _get_parser(): action='store_true', help='generate profiling data') + # save intermediate file(s) + parser.add_argument( + '--save_intermediate', + action='store_true', + help='Save intermediate files to output folder') + ## arguments for quantization quantization_group = parser.add_argument_group('arguments for quantization') @@ -93,13 +100,13 @@ def _get_parser(): '--input_type', type=str, help= - 'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.' + 'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.' ) quantization_group.add_argument( '--output_type', type=str, help= - 'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.' + 'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.' ) quantization_group.add_argument( '--min_percentile', @@ -126,10 +133,50 @@ def _get_parser(): "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)" ) quantization_group.add_argument( - '--quant_config', - type=str, + '--quant_config', type=str, help="Path to the quantization configuration file.") + quantization_group.add_argument( + '--evaluate_result', + action='store_true', + help= + "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results." + ) + quantization_group.add_argument( + '--test_data', type=str, help="Path to the test data used for evaluation.") + quantization_group.add_argument( + '--print_mae', + action='store_true', + help= + "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_mape', + action='store_true', + help= + "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_mpeir', + action='store_true', + help= + "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_top1_match', + action='store_true', + help= + "Print Top-1 match ratio of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_top5_match', + action='store_true', + help= + "Print Top-5 match ratio of inference results between quantized model and fp32 model." + ) + quantization_group.add_argument( + '--print_mse', + action='store_true', help= - "Path to the quantization configuration file." + "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model." ) # arguments for force_quantparam option @@ -162,6 +209,14 @@ def _get_parser(): copy_quantparam_group.add_argument( '--dst_tensor_name', type=str, action='append', help='tensor name (string)') + # arguments for fake_quant option + fake_quant_group = parser.add_argument_group('arguments for fake_quantize option') + + fake_quant_group.add_argument( + '--fake_quantize', + action='store_true', + help='convert quantized model to fake-quantized fp32 model.') + return parser @@ -171,8 +226,29 @@ def _set_default_values(args): setattr(args, 'input_model_dtype', 'float32') if not _utils._is_valid_attr(args, 'quantized_dtype'): setattr(args, 'quantized_dtype', 'uint8') + if _utils._is_valid_attr(args, 'quant_config'): + # Get quantized_dtype from qconfig file + try: + with open(getattr(args, 'quant_config')) as f: + qconf = json.load(f) + if 'default_quantization_dtype' in qconf: + setattr(args, 'quantized_dtype', + qconf['default_quantization_dtype']) + except json.decoder.JSONDecodeError: + print('Failed to decode ' + getattr(args, 'quant_config') + + '. Please check it is a json file.') if not _utils._is_valid_attr(args, 'granularity'): setattr(args, 'granularity', 'layer') + if _utils._is_valid_attr(args, 'quant_config'): + # Get granularity from qconfig file + try: + with open(getattr(args, 'quant_config')) as f: + qconf = json.load(f) + if 'default_granularity' in qconf: + setattr(args, 'granularity', qconf['default_granularity']) + except json.decoder.JSONDecodeError: + print('Failed to decode ' + getattr(args, 'quant_config') + + '. Please check it is a json file.') if not _utils._is_valid_attr(args, 'mode'): setattr(args, 'mode', 'percentile') if not _utils._is_valid_attr(args, 'min_percentile'): @@ -238,11 +314,18 @@ def _quantize(args): _copy_qparam(args) return + if _utils._is_valid_attr(args, 'fake_quantize'): + # fake-quantize model + _fake_quantize(args) + return + # get file path to log dir_path = os.path.dirname(os.path.realpath(__file__)) logfile_path = os.path.realpath(args.output_path) + '.log' with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir: + if _utils._is_valid_attr(args, 'save_intermediate'): + tmpdir = os.path.dirname(logfile_path) # get driver path circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer') record_minmax_path = os.path.join(dir_path, 'record-minmax') @@ -263,13 +346,19 @@ def _quantize(args): circle_quantizer_cmd.append(getattr(args, 'quantized_dtype')) if _utils._is_valid_attr(args, 'granularity'): circle_quantizer_cmd.append(getattr(args, 'granularity')) + if _utils._is_valid_attr(args, 'quant_config'): + # NOTE --config conflicts with --config option in onecc, so + # we use quant_config for one-quantize + circle_quantizer_cmd.append('--config') + circle_quantizer_cmd.append(getattr(args, 'quant_config')) # input and output path if _utils._is_valid_attr(args, 'input_path'): circle_quantizer_cmd.append(getattr(args, 'input_path')) - tmp_output_path_1 = os.path.join( + tmp_weights_fake_quant_path = os.path.join( tmpdir, - os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle' - circle_quantizer_cmd.append(tmp_output_path_1) + os.path.splitext(os.path.basename( + args.input_path))[0]) + '.weights_fake_quant.circle' + circle_quantizer_cmd.append(tmp_weights_fake_quant_path) # profiling if _utils._is_valid_attr(args, 'generate_profile_data'): circle_quantizer_cmd.append('--generate_profile_data') @@ -279,45 +368,23 @@ def _quantize(args): # run circle-quantizer _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) - ## make a command to record min-max value of each tensor while running the representative dataset - circle_record_minmax_cmd = [record_minmax_path] - # verbose - if _utils._is_valid_attr(args, 'verbose'): - circle_record_minmax_cmd.append('--verbose') - # input and output path - circle_record_minmax_cmd.append('--input_model') - circle_record_minmax_cmd.append(tmp_output_path_1) - tmp_output_path_2 = os.path.join( + tmp_minmax_recorded_path = os.path.join( tmpdir, - os.path.splitext(os.path.basename(args.input_path))[0]) + '2.circle' - circle_record_minmax_cmd.append('--output_model') - circle_record_minmax_cmd.append(tmp_output_path_2) - # input data - if _utils._is_valid_attr(args, 'input_data'): - circle_record_minmax_cmd.append('--input_data') - circle_record_minmax_cmd.append(getattr(args, 'input_data')) - if _utils._is_valid_attr(args, 'input_data_format'): - circle_record_minmax_cmd.append('--input_data_format') - circle_record_minmax_cmd.append(getattr(args, 'input_data_format')) - # min and max percentile - if _utils._is_valid_attr(args, 'min_percentile'): - circle_record_minmax_cmd.append('--min_percentile') - circle_record_minmax_cmd.append(getattr(args, 'min_percentile')) - if _utils._is_valid_attr(args, 'max_percentile'): - circle_record_minmax_cmd.append('--max_percentile') - circle_record_minmax_cmd.append(getattr(args, 'max_percentile')) - # mode - if _utils._is_valid_attr(args, 'mode'): - circle_record_minmax_cmd.append('--mode') - circle_record_minmax_cmd.append(getattr(args, 'mode')) - # profiling - if _utils._is_valid_attr(args, 'generate_profile_data'): - circle_record_minmax_cmd.append('--generate_profile_data') - - f.write((' '.join(circle_record_minmax_cmd) + '\n').encode()) + os.path.splitext(os.path.basename( + args.input_path))[0]) + '.minmax_recorded.circle' - # run record-minmax - _utils._run(circle_record_minmax_cmd, err_prefix="record_minmax", logfile=f) + ## make a command to record min-max value of each tensor while running the representative dataset + record_minmax_cmd = Command(record_minmax_path, args, f) + record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \ + .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \ + .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \ + .add_option_with_valid_args('--input_data', ['input_data']) \ + .add_option_with_valid_args('--input_data_format', ['input_data_format']) \ + .add_option_with_valid_args('--min_percentile', ['min_percentile']) \ + .add_option_with_valid_args('--max_percentile', ['max_percentile']) \ + .add_option_with_valid_args('--mode', ['mode']) \ + .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \ + .run() ## make a second command to quantize the model using the embedded information circle_quantizer_cmd = [circle_quantizer_path] @@ -349,7 +416,7 @@ def _quantize(args): circle_quantizer_cmd.append('--config') circle_quantizer_cmd.append(getattr(args, 'quant_config')) # input and output path - circle_quantizer_cmd.append(tmp_output_path_2) + circle_quantizer_cmd.append(tmp_minmax_recorded_path) if _utils._is_valid_attr(args, 'output_path'): circle_quantizer_cmd.append(getattr(args, 'output_path')) # profiling @@ -361,6 +428,38 @@ def _quantize(args): # run circle-quantizer _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) + # evaluate + if _utils._is_valid_attr(args, 'evaluate_result'): + circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff') + quant_model = "" + if _utils._is_valid_attr(args, 'output_path'): + quant_model = getattr(args, 'output_path') + tmp_fake_quant_model = os.path.join( + tmpdir, + os.path.splitext(os.path.basename( + args.input_path))[0]) + '.fake_quant.circle' + + # do fake quantization + fake_quantize_cmd = Command(circle_quantizer_path, args, f) + fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \ + .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \ + .run() + + # compare fake-quant model and fp32 model + circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f) + circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \ + .add_option_with_values('--second_model', [tmp_fake_quant_model]) \ + .add_option_with_valid_args('--first_input_data', ['test_data']) \ + .add_option_with_valid_args('--second_input_data', ['test_data']) \ + .add_option_with_valid_args('--input_data_format', ['input_data_format']) \ + .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \ + .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \ + .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \ + .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \ + .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \ + .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \ + .run() + def _write_qparam(args): # get file path to log @@ -433,6 +532,24 @@ def _copy_qparam(args): _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) +def _fake_quantize(args): + # get file path to log + dir_path = os.path.dirname(os.path.realpath(__file__)) + logfile_path = os.path.realpath(args.output_path) + '.log' + + with open(logfile_path, 'wb') as f: + # get driver path + circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer') + q_model = getattr(args, 'input_path') + fq_model = getattr(args, 'output_path') + + # do fake quantization + fake_quantize_cmd = Command(circle_quantizer_path, args, f) + fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \ + .add_option_with_values('--fake_quantize', [q_model, fq_model]) \ + .run() + + def main(): # parse arguments parser = _get_parser() diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc index 25682ff4b..a5ba636a2 100644 --- a/compiler/one-cmds/onecc +++ b/compiler/one-cmds/onecc @@ -25,6 +25,8 @@ import os import subprocess import sys +from onelib.CfgRunner import CfgRunner +from onelib.WorkflowRunner import WorkflowRunner import utils as _utils # TODO Find better way to suppress trackback on error @@ -42,6 +44,7 @@ subtool_list = { 'backend': { 'codegen': 'Code generation tool', 'profile': 'Profile backend model file', + 'infer': 'Infer backend model file' }, } @@ -64,12 +67,25 @@ def _check_subtool_exists(): def _get_parser(): - onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [COMMAND <args>]' + onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [-W WORKFLOW] [-O OPTIMIZATION] [COMMAND <args>]' onecc_desc = 'Run ONE driver via several commands or configuration file' parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage) _utils._add_default_arg(parser) + opt_name_list = _utils._get_optimization_list(get_name=True) + opt_name_list = ['-' + s for s in opt_name_list] + if not opt_name_list: + opt_help_message = '(No available optimization options)' + else: + opt_help_message = '(Available optimization options: ' + ', '.join( + opt_name_list) + ')' + opt_help_message = 'optimization name to use ' + opt_help_message + parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message) + + parser.add_argument( + '-W', '--workflow', type=str, metavar='WORKFLOW', help='run with workflow file') + # just for help message compile_group = parser.add_argument_group('compile to circle model') for tool, desc in subtool_list['compile'].items(): @@ -98,45 +114,17 @@ def _parse_arg(parser): def _verify_arg(parser, args): """verify given arguments""" # check if required arguments is given - if not _utils._is_valid_attr(args, 'config'): - parser.error('-C/--config argument is required') - - -def _get_driver_name(driver_name): - return { - 'one-optimize': 'one-optimize', - 'one-quantize': 'one-quantize', - 'one-pack': 'one-pack', - 'one-codegen': 'one-codegen', - 'one-profile': 'one-profile' - }[driver_name] - - -def _parse_cfg(args): - config = configparser.ConfigParser() - config.optionxform = str - parsed = config.read(os.path.expanduser(getattr(args, 'config'))) - if not parsed: - raise FileNotFoundError('Not found given configuration file') - return config - - -def _is_available_driver(config, driver_name): - return config.has_option('onecc', driver_name) and config.getboolean( - 'onecc', driver_name) - - -def _verify_cfg(import_driver_list, config): - if not config.has_section('onecc'): - raise ImportError('[onecc] section is required in configuration file') - - import_driver_cnt = 0 - for d in import_driver_list: - if _is_available_driver(config, d): - import_driver_cnt += 1 - - if import_driver_cnt > 1: - raise AssertionError('Only one import-* driver can be executed') + if not _utils._is_valid_attr(args, 'config') and not _utils._is_valid_attr( + args, 'workflow'): + parser.error('-C/--config or -W/--workflow argument is required') + # check if given optimization option exists + opt_name_list = _utils._get_optimization_list(get_name=True) + opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list] + if _utils._is_valid_attr(args, 'O'): + if ' ' in getattr(args, 'O'): + parser.error('Not allowed to have space in the optimization name') + if not getattr(args, 'O') in opt_name_list: + parser.error('Invalid optimization option') def main(): @@ -158,35 +146,16 @@ def main(): # verify arguments _verify_arg(parser, args) - # parse configuration file - config = _parse_cfg(args) - - # verify configuration file bin_dir = os.path.dirname(os.path.realpath(__file__)) - import_drivers_dict = _utils._detect_one_import_drivers(bin_dir) - transform_drivers = [ - 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile' - ] - _verify_cfg(import_drivers_dict, config) - - # get sections to run - section_to_run = [] - for d in list(import_drivers_dict) + transform_drivers: - if _is_available_driver(config, d): - section_to_run.append(d) - - # run - dir_path = os.path.dirname(os.path.realpath(__file__)) - for section in section_to_run: - if section in import_drivers_dict: - # we already has driver name in dict - driver_name = import_drivers_dict[section] - else: - driver_name = _get_driver_name(section) - options = ['--config', getattr(args, 'config'), '--section', section] - if _utils._is_valid_attr(args, 'verbose'): - options.append('--verbose') - _call_driver(driver_name, options) + if _utils._is_valid_attr(args, 'config'): + runner = CfgRunner(args.config) + runner.detect_import_drivers(bin_dir) + if _utils._is_valid_attr(args, 'O'): + runner.add_opt(getattr(args, 'O')) + runner.run(bin_dir) + elif _utils._is_valid_attr(args, 'workflow'): + runner = WorkflowRunner(args.workflow) + runner.run(bin_dir) if __name__ == '__main__': diff --git a/compiler/one-cmds/onecc.template.cfg b/compiler/one-cmds/onecc.template.cfg index a23d1cea9..6f6a4e266 100644 --- a/compiler/one-cmds/onecc.template.cfg +++ b/compiler/one-cmds/onecc.template.cfg @@ -1,28 +1,144 @@ +; To activate a step (or task), +; set True for the step in [onecc] section and fill options in the corresponding section [onecc] -one-import-tf=True +; neural network model to circle +one-import-tf=False one-import-tflite=False one-import-bcq=False one-import-onnx=False -one-optimize=True +; circle to circle with optimization +one-optimize=False +; circle to circle with quantization one-quantize=False -one-pack=True +; partition circle +one-partition=False +; package circle and metadata into nnpackage +one-pack=False +; generate code for backend one-codegen=False +; profile one-profile=False +; infer +one-infer=False [one-import-tf] -input_path=/path/to/inception_v3.pb -output_path=inception_v3.circle -input_arrays=input -input_shapes=1,299,299,3 -output_arrays=InceptionV3/Predictions/Reshape_1 -converter_version=v1 +# mandatory +; pb file +input_path= +; circle file +output_path= +# optional +; v1 or v2 +converter_version=v2 +; graph_def(default), saved_model or keras_model model_format=graph_def +# optional but mandatory for model_format=graph_def +; input tensor names of the input arrays, comma-separated +input_arrays= +; output tensor names of the input arrays, comma-separated +output_arrays= +; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3) +input_shapes= + +[one-import-tflite] +# mandatory +; tflite file +input_path= +; circle file +output_path= + +[one-import-bcq] +# mandatory +; bcq file +input_path= +; circle file +output_path= +# optional +; v1 or v2 +converter_version=v2 +; graph_def(default), saved_model or keras_model +model_format=graph_def +# optional but mandatory for model_format=graph_def +; input tensor names of the input arrays, comma-separated +input_arrays= +; output tensor names of the input arrays, comma-separated +output_arrays= +; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3) +input_shapes= + +[one-import-onnx] +# mandatory +; onnx file +input_path= +; circle file +output_path= +# optional +; True or False +unroll_rnn= +; True or False +unroll_lstm= [one-optimize] -input_path=inception_v3.circle -output_path=inception_v3.opt.circle -generate_profile_data=False +# mandatory +; circle file +input_path= +; circle file +output_path= +# //TODO: Add available options + +[one-quantize] +# mandatory +; circle file +input_path= +; circle file +output_path= +# optional arguments for quantization +; input data file (if not given, random data will be used for calibration) +input_data= +; h5/hdf5(default), list/filelist, or dir/directory +input_data_format= +; dtype of quantized model (uint8(default), int16) +quantized_dtype= +; granularity of quantization (layer(default), channel) +granularity= +; dtype of model's input (uint8, int16, float32). Same with quantized_dtype by default. +input_type= +; dtype of model's output (uint8, int16, float32). Same with quantized_dtype by default. +output_type= + +[one-partition] +# mandatory +; partition file which provides backend to assign +part_file= +; circle file +input_file= +# //TODO: Add available options [one-pack] -input_path=inception_v3.opt.circle -output_path=inception_v3_pack +# mandatory +; input path +input_path= +; output path +output_path= +# //TODO: Add available options + +[one-codegen] +# mandatory +; backend name +backend= +; commands for each backend +command= + +[one-profile] +# mandatory +; backend name +backend= +# //TODO: Add available options + +[one-infer] +# mandatory (mutually exclusive) +; backend name +backend= +; driver name +driver= +# //TODO: Add available options diff --git a/compiler/one-cmds/onelib/CfgRunner.py b/compiler/one-cmds/onelib/CfgRunner.py new file mode 100644 index 000000000..c66e5b4ba --- /dev/null +++ b/compiler/one-cmds/onelib/CfgRunner.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import configparser +import os +import warnings + +import utils as oneutils + + +def _simple_warning(message, category, filename, lineno, file=None, line=None): + return f'{category.__name__}: {message}\n' + + +class CfgRunner: + driver_sequence = [ + 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile', + 'one-partition', 'one-infer' + ] + + def __init__(self, path): + self.path = path + self.optparser = None + self.cfgparser = configparser.ConfigParser() + # make option names case sensitive + self.cfgparser.optionxform = str + parsed = self.cfgparser.read(os.path.expanduser(path)) + if not parsed: + raise FileNotFoundError('Not found given configuration file') + + self._verify_cfg(self.cfgparser) + # default import drivers + self.import_drivers = [ + 'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite' + ] + + def _verify_cfg(self, cfgparser): + if not cfgparser.has_section('onecc'): + if cfgparser.has_section('one-build'): + warnings.formatwarning = _simple_warning + warnings.warn( + "[one-build] section will be deprecated. Please use [onecc] section.") + else: + raise ImportError('[onecc] section is required in configuration file') + + def _is_available(self, driver): + # if there's no `onecc` section, it will find `one-build` section because of backward compatibility + return (self.cfgparser.has_option('onecc', driver) and self.cfgparser.getboolean( + 'onecc', driver)) or (self.cfgparser.has_option('one-build', driver) + and self.cfgparser.getboolean('one-build', driver)) + + def add_opt(self, opt): + self.optparser = configparser.ConfigParser() + # make option names case sensitive + self.optparser.optionxform = str + opt_book = dict( + zip(oneutils._get_optimization_list(get_name=True), + oneutils._get_optimization_list())) + parsed = self.optparser.read(opt_book['O' + opt]) + if not parsed: + raise FileNotFoundError('Not found given optimization configuration file') + if len(self.optparser.sections()) != 1 or self.optparser.sections( + )[0] != 'one-optimize': + raise AssertionError( + 'Optimization configuration file only allowed to have a \'one-optimize\' section' + ) + self.opt = opt + + def detect_import_drivers(self, dir): + self.import_drivers = list(oneutils._detect_one_import_drivers(dir).keys()) + + def run(self, working_dir, verbose=False): + section_to_run = [] + for d in self.import_drivers + self.driver_sequence: + if self._is_available(d): + section_to_run.append(d) + + for section in section_to_run: + options = ['--config', self.path, '--section', section] + if section == 'one-optimize' and self.optparser: + options += ['-O', self.opt] + if verbose: + options.append('--verbose') + driver_path = os.path.join(working_dir, section) + cmd = [driver_path] + options + oneutils._run(cmd) diff --git a/compiler/one-cmds/onelib/OptionBuilder.py b/compiler/one-cmds/onelib/OptionBuilder.py new file mode 100644 index 000000000..6a75783ad --- /dev/null +++ b/compiler/one-cmds/onelib/OptionBuilder.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from onelib.constant import CONSTANT + + +class OptionBuilder: + def __init__(self, one_cmd_type): + self.type = one_cmd_type + + def _build_default(self, commands): + options = [] + for k, v in commands.items(): + options.extend(['--' + k, v]) + return options + + def _build_with_unknown_command(self, commands): + COMMAND_K = 'command' + options = [] + for k, v in commands.items(): + if k == COMMAND_K: + continue + options.extend(['--' + k, v]) + options.extend(['--']) + options.extend(commands[COMMAND_K].split()) + return options + + def _build_import(self, commands): + options = [] + arg_0 = ['save_intermediate'] + for k, v in commands.items(): + if k in arg_0 and v == "True": + options.extend(['--' + k]) + continue + options.extend(['--' + k, v]) + return options + + def _build_optimize(self, commands): + options = [] + arg_0 = ['generate_profile_data'] + arg_1 = ['input_path', 'output_path', 'change_outputs'] + for k, v in commands.items(): + if k in arg_1: + options.extend(['--' + k, v]) + continue + if k in arg_0 and v == 'True': + options.extend(['--' + k]) + continue + for opt in CONSTANT.OPTIMIZATION_OPTS: + if k == opt[0] and v == "True": + options.extend(['--' + k]) + break + return options + + def _build_quantize(self, commands): + options = [] + arg_0 = [ + 'generate_profile_data', 'save_intermediate', 'TF-style_maxpool', + 'evaluate_result', 'print_mae', 'print_mape', 'print_mpeir', + 'print_top1_match', 'print_top5_match', 'force_quantparam', 'copy_quantparam' + ] + for k, v in commands.items(): + if k in arg_0 and v == "True": + options.extend(['--' + k]) + continue + options.extend(['--' + k, v]) + return options + + def build(self, commands): + cmd_book = dict.fromkeys( + ['one-import-bcq', 'one-import-tflite', 'one-pack', 'one-partition'], + self._build_default) + cmd_book['one-codegen'] = self._build_with_unknown_command + cmd_book['one-import-onnx'] = self._build_import + cmd_book['one-import-pytorch'] = self._build_import + cmd_book['one-import-tf'] = self._build_import + cmd_book['one-infer'] = self._build_with_unknown_command + cmd_book['one-optimize'] = self._build_optimize + cmd_book['one-profile'] = self._build_with_unknown_command + cmd_book['one-quantize'] = self._build_quantize + + return cmd_book[self.type](commands) diff --git a/compiler/one-cmds/onelib/TopologicalSortHelper.py b/compiler/one-cmds/onelib/TopologicalSortHelper.py new file mode 100644 index 000000000..d05adea8d --- /dev/null +++ b/compiler/one-cmds/onelib/TopologicalSortHelper.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict + + +class TopologicalSortHelper: + def __init__(self, vertices): + self.graph = defaultdict(list) + self.vertices = vertices + + def add_edge(self, u, v): + self.graph[u].append(v) + + def sort_util(self, v, visited, stack): + visited[v] = True + + for i in self.graph[v]: + if visited[i] == False: + self.sort_util(i, visited, stack) + + stack.insert(0, v) + + def sort(self): + visited = dict.fromkeys(self.vertices, False) + stack = [] + + for v in self.vertices: + if visited[v] == False: + self.sort_util(v, visited, stack) + + return stack diff --git a/compiler/one-cmds/onelib/WorkflowRunner.py b/compiler/one-cmds/onelib/WorkflowRunner.py new file mode 100644 index 000000000..0482dd9da --- /dev/null +++ b/compiler/one-cmds/onelib/WorkflowRunner.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os + +from onelib.OptionBuilder import OptionBuilder +from onelib.TopologicalSortHelper import TopologicalSortHelper +from onelib.CfgRunner import CfgRunner +import utils as oneutils + + +class WorkflowRunner: + WORKFLOWS_K = 'workflows' + DEPENDENCIES_K = 'run-after' + CFG_REFERENCE_K = 'cfg-reference' + WORKFLOW_STEPS_K = 'steps' + ONE_CMD_TOOL_K = 'one-cmd' + COMMANDS_K = 'commands' + + def __init__(self, path): + try: + with open(path) as f: + self.json_contents = json.load(f) + except FileNotFoundError: + raise FileNotFoundError("Not found given workflow file") + except json.decoder.JSONDecodeError: + raise ImportError("Invalid workflow file") + + self._verify_workflow(self.json_contents) + + workflows = self.json_contents[self.WORKFLOWS_K] + self.adj = dict.fromkeys(workflows, []) + # decide the order according to the dependencies of each workflow. + helper = TopologicalSortHelper(workflows) + for workflow_k in workflows: + workflow = self.json_contents[workflow_k] + if self.DEPENDENCIES_K in workflow: + for previous_workflow in workflow[self.DEPENDENCIES_K]: + helper.add_edge(previous_workflow, workflow_k) + self.adj[previous_workflow].append(workflow_k) + self.workflow_sequence = helper.sort() + + self._check_cycle() + + def _check_cycle(self): + pos = dict() + index = 0 + workflow_num = len(self.workflow_sequence) + # number the order + for seq_idx in range(workflow_num): + pos[self.workflow_sequence[seq_idx]] = index + index += 1 + + for seq_idx in range(workflow_num): + first_wf = self.workflow_sequence[seq_idx] + for adj_wf in self.adj[first_wf]: + first_pos = 0 if first_wf not in pos else pos[first_wf] + second_pos = 0 if adj_wf not in pos else pos[adj_wf] + if (first_pos > second_pos): + raise RuntimeError("Workflows should not have a cycle") + + def _verify_workflow(self, json_contents): + # workflow file should have WORKFLOWS_K + if not self.WORKFLOWS_K in json_contents: + raise ValueError("Not found \"" + self.WORKFLOWS_K + + "\" key in workflow file") + + workflows = json_contents[self.WORKFLOWS_K] + # workflow file should have keys listed in WORKFLOWS_K + for workflow_k in workflows: + if not workflow_k in json_contents: + raise ValueError("Not found " + workflow_k + " key listed in \"" + + self.WORKFLOWS_K + "\"") + + # each workflow should have either WORKFLOW_STEPS_K or CFG_REFERENCE_K + for workflow_k in workflows: + if not self.WORKFLOW_STEPS_K in json_contents[workflow_k] and not self.CFG_REFERENCE_K in json_contents[workflow_k]: + raise ValueError("Each workflow should have either \"" + + self.WORKFLOW_STEPS_K + "\" or \"" + + self.CFG_REFERENCE_K + "\"") + for workflow_k in workflows: + if self.WORKFLOW_STEPS_K in json_contents[workflow_k] and self.CFG_REFERENCE_K in json_contents[workflow_k]: + raise ValueError("\"" + self.WORKFLOW_STEPS_K + "\" and \"" + + self.CFG_REFERENCE_K + "\" are exclusive key") + + # each step should have ONE_CMD_TOOL_K and COMMANDS_K + for workflow_k in workflows: + workflow = json_contents[workflow_k] + if self.WORKFLOW_STEPS_K in workflow: + step_keys = workflow[self.WORKFLOW_STEPS_K] + for step_k in step_keys: + step = workflow[step_k] + if not self.ONE_CMD_TOOL_K in step or not self.COMMANDS_K in step: + raise ValueError("Each step should have \"" + + self.ONE_CMD_TOOL_K + "\"" + " and \"" + + self.COMMANDS_K + "\"") + + def run(self, working_dir, verbose=False): + # run workflows in sequence + for workflow_k in self.workflow_sequence: + workflow = self.json_contents[workflow_k] + if self.WORKFLOW_STEPS_K in workflow: + steps = workflow[self.WORKFLOW_STEPS_K] + for step_k in steps: + step = workflow[step_k] + commands = step[self.COMMANDS_K] + driver_name = step[self.ONE_CMD_TOOL_K] + option_builder = OptionBuilder(driver_name) + options = option_builder.build(commands) + # get the absolute path of the caller + driver_path = os.path.join(working_dir, driver_name) + cmd = [driver_path] + options + oneutils._run(cmd) + elif self.CFG_REFERENCE_K in workflow: + cfg_path = workflow[self.CFG_REFERENCE_K]['path'] + runner = CfgRunner(cfg_path) + runner.run(working_dir, verbose) diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py index 7ddd7382d..7dd79b65d 100644 --- a/compiler/one-cmds/onelib/constant.py +++ b/compiler/one-cmds/onelib/constant.py @@ -14,11 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. + class CONSTANT: __slots__ = () # This prevents access via __dict__. OPTIMIZATION_OPTS = ( # (OPTION_NAME, HELP_MESSAGE) - ('O1', 'enable O1 optimization pass'), ('convert_nchw_to_nhwc', 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.' ), @@ -29,6 +29,7 @@ class CONSTANT: 'convert the output shape of the model (argument for convert_nchw_to_nhwc)'), ('fold_add_v2', 'fold AddV2 op with constant inputs'), ('fold_cast', 'fold Cast op with constant input'), + ('fold_densify', 'fold Densify op with sparse constant input'), ('fold_dequantize', 'fold Dequantize op'), ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'), ('fold_gather', 'fold Gather op'), @@ -62,12 +63,16 @@ class CONSTANT: ('remove_unnecessary_slice', 'remove unnecessary slice ops'), ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'), ('remove_unnecessary_split', 'remove unnecessary split ops'), + ('replace_non_const_fc_with_batch_matmul', + 'replace FullyConnected op with non-const weights to BatchMatMul op'), + ('replace_sub_with_add', 'replace Sub op with Add op'), ('resolve_customop_add', 'convert Custom(Add) op to Add op'), ('resolve_customop_batchmatmul', 'convert Custom(BatchMatmul) op to BatchMatmul op'), ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'), ('resolve_customop_max_pool_with_argmax', 'convert Custom(MaxPoolWithArgmax) to net of builtin operators'), + ('resolve_customop_splitv', 'convert Custom(SplitV) op to SplitV op'), ('shuffle_weight_to_16x1float32', 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.' ' Note that it only converts weights whose row is a multiple of 16'), diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py index d8380f28d..0015e8319 100644 --- a/compiler/one-cmds/onelib/make_cmd.py +++ b/compiler/one-cmds/onelib/make_cmd.py @@ -19,6 +19,7 @@ import sys import onelib.constant as _constant + def _is_valid_attr(args, attr): return hasattr(args, attr) and getattr(args, attr) @@ -64,6 +65,10 @@ def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path): cmd.append('--output_arrays') cmd.append(getattr(args, 'output_arrays')) + # experimental options + if _is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'): + cmd.append('--experimental_disable_batchmatmul_unfold') + return cmd diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py index 26c2b75b9..0141514b6 100755 --- a/compiler/one-cmds/onnx_legalizer.py +++ b/compiler/one-cmds/onnx_legalizer.py @@ -341,7 +341,8 @@ def _dtype_to_np(dtype): raise NotImplementedError('unsupported data type') -def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name): +def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, + activation_name): """Generate subgraph of one direction of unrolled RNN layer Args: @@ -395,7 +396,7 @@ def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activa def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation, - clip, direction, hidden_size, layout): + clip, direction, hidden_size, layout): """Generate Simple (forward or reverse) unrolled RNN Args: @@ -432,7 +433,7 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a else: initial_h = None state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip, - activation) + activation) y_direction_dim = layout + 1 y_h_direction_dim = layout state_layout_tensors = [] @@ -447,12 +448,11 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a transformer.make_node( 'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim]) Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations, - clip, hidden_size, layout): + clip, hidden_size, layout): """Generate Bidirectional unrolled RNN Args: @@ -503,10 +503,10 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim]) state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0], - initial_h[0], clip, activations[0]) + initial_h[0], clip, activations[0]) x.reverse() state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1], - initial_h[1], clip, activations[1]) + initial_h[1], clip, activations[1]) state_b_tensors.reverse() y_direction_dim = layout + 1 @@ -538,8 +538,7 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac axis=y_h_direction_dim) Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) def _legalize_RNN(transformer, tensor_infos, node): @@ -600,10 +599,10 @@ def _legalize_RNN(transformer, tensor_infos, node): if direction in ['forward', 'reverse']: _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0], - clip, direction, hidden_size, layout) + clip, direction, hidden_size, layout) elif direction == 'bidirectional': - _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip, - hidden_size, layout) + _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, + clip, hidden_size, layout) else: raise RuntimeError('Unknown RNN type') @@ -611,7 +610,7 @@ def _legalize_RNN(transformer, tensor_infos, node): def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip, - act, dtype, hidden_size, batch_size): + act, dtype, hidden_size, batch_size): """Generate subgraph for one direction of unrolled LSTM layer Args: @@ -754,7 +753,7 @@ def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos, - activations, clip, direction, hidden_size, layout): + activations, clip, direction, hidden_size, layout): """Generate Simple (forward or reverse) unrolled LSTM Args: @@ -818,17 +817,15 @@ def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos, transformer.make_node( 'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim]) Y_c = outputs[2] - transformer.make_node( - 'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim]) + transformer.make_node('Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim]) if direction == 'reverse': state_layout_tensors.reverse() Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) -def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations, - clip, hidden_size, layout): +def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, + activations, clip, hidden_size, layout): """Generate Bidirectional unrolled LSTM Args: @@ -929,12 +926,10 @@ def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, a Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim]) Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim]) Y_c = outputs[2] - transformer.make_node( - 'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim) + transformer.make_node('Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim) Y = outputs[0] - transformer.make_node( - 'Concat', state_layout_tensors, [Y], axis=seq_length_dim) + transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim) def _legalize_LSTM(transformer, tensor_infos, node): @@ -1001,10 +996,10 @@ def _legalize_LSTM(transformer, tensor_infos, node): if direction in ['forward', 'reverse']: _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations, - clip, direction, hidden_size, layout) + clip, direction, hidden_size, layout) elif direction == 'bidirectional': _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations, - clip, hidden_size, layout) + clip, hidden_size, layout) else: raise RuntimeError('Unknown LSTM type') @@ -1052,10 +1047,12 @@ def legalize(model, options): if __name__ == '__main__': if len(sys.argv) < 3: - print('usage: ./legalize_onnx.py <path to input model> <path to output model>\n' - '\n' - ' In stand-alone utility mode this tool provides basic funtionality\n' - ' If you want to have more control over applied transformations, use this legalizer as a library') + print( + 'usage: ./legalize_onnx.py <path to input model> <path to output model>\n' + '\n' + ' In stand-alone utility mode this tool provides basic funtionality\n' + ' If you want to have more control over applied transformations, use this legalizer as a library' + ) exit(1) options = LegalizeOptions() options.unroll_lstm = True diff --git a/compiler/one-cmds/requires.cmake b/compiler/one-cmds/requires.cmake index b1aabdb97..c27920980 100644 --- a/compiler/one-cmds/requires.cmake +++ b/compiler/one-cmds/requires.cmake @@ -1,6 +1,7 @@ require("tf2tfliteV2") require("tflite2circle") require("circle2circle") +require("circle-eval-diff") require("circle-quantizer") require("record-minmax") require("vconone") diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt index caea756c2..17f55ec96 100644 --- a/compiler/one-cmds/tests/CMakeLists.txt +++ b/compiler/one-cmds/tests/CMakeLists.txt @@ -4,6 +4,8 @@ file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test") file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg") file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json") +file(GLOB PYSCRIPTS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.py") +file(GLOB WORKFLOWITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.workflow.json") # Create a script to run the tests at installation folder set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh") @@ -45,6 +47,16 @@ foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS}) install(FILES ${QCONFIGITEM} DESTINATION test) endforeach(QCONFIGITEM) +foreach(PYSCRIPT IN ITEMS ${PYSCRIPTS}) + get_filename_component(ITEM_PREFIX ${PYSCRIPT} NAME_WE) + install(FILES ${PYSCRIPT} DESTINATION test) +endforeach(PYSCRIPT) + +foreach(WORKFLOWITEM IN ITEMS ${WORKFLOWITEMS}) + get_filename_component(ITEM_PREFIX ${WORKFLOWITEM} NAME_WE) + install(FILES ${WORKFLOWITEM} DESTINATION test) +endforeach(WORKFLOWITEM) + file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n") file(APPEND "${DRIVER_SCRIPT}" diff --git a/compiler/one-cmds/tests/OONECC_024.cfg b/compiler/one-cmds/tests/OONECC_024.cfg new file mode 100644 index 000000000..a39aae071 --- /dev/null +++ b/compiler/one-cmds/tests/OONECC_024.cfg @@ -0,0 +1,2 @@ +[one-optimize] +make_batchnorm_gamma_positive=True diff --git a/compiler/one-cmds/tests/one-build_008.cfg b/compiler/one-cmds/tests/one-build_008.cfg index 615047c86..8c777f64f 100644 --- a/compiler/one-cmds/tests/one-build_008.cfg +++ b/compiler/one-cmds/tests/one-build_008.cfg @@ -15,7 +15,6 @@ output_path=test_onnx_model.circle [one-optimize] input_path=test_onnx_model.circle output_path=test_onnx_model.opt.circle -all=True remove_redundant_transpose=True [one-codegen] diff --git a/compiler/one-cmds/tests/one-build_009.cfg b/compiler/one-cmds/tests/one-build_009.cfg index 66bca250d..b5a35dd97 100644 --- a/compiler/one-cmds/tests/one-build_009.cfg +++ b/compiler/one-cmds/tests/one-build_009.cfg @@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle [one-optimize] input_path=onnx_conv2d_conv2d.circle output_path=onnx_conv2d_conv2d.opt.circle -all=True remove_redundant_transpose=True convert_nchw_to_nhwc=True diff --git a/compiler/one-cmds/tests/one-import-onnx_002.test b/compiler/one-cmds/tests/one-import-onnx_002.test new file mode 100644 index 000000000..a6a38eee5 --- /dev/null +++ b/compiler/one-cmds/tests/one-import-onnx_002.test @@ -0,0 +1,71 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# test for experimental_disable_batchmatmul_unfold option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./reshape_matmul.onnx" +outputfile="./reshape_matmul.circle" + +rm -rf ${outputfile} +rm -rf ${outputfile}.log + +# run test without option that should drop FULLY_CONNECTED +one-import-onnx \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1 + +if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then + trap_err_onexit +fi + +rm -rf ${outputfile} +rm -rf ${outputfile}.log + +# run test with option that should drop BATCH_MATMUL +one-import-onnx \ +--experimental_disable_batchmatmul_unfold \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1 + +if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" +exit 0 diff --git a/compiler/one-cmds/tests/one-infer-test-post-process.py b/compiler/one-cmds/tests/one-infer-test-post-process.py new file mode 100644 index 000000000..0f0e0d701 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer-test-post-process.py @@ -0,0 +1,16 @@ +# This script gets one argument and print it + +import sys +from pathlib import Path + + +def main(): + if len(sys.argv) < 2: + filepath = Path(sys.argv[0]) + sys.exit("Usage: " + filepath.name + " [Word to print]") + word = sys.argv[1] + print(word) + + +if __name__ == '__main__': + main() diff --git a/compiler/one-cmds/tests/one-infer_001.test b/compiler/one-cmds/tests/one-infer_001.test new file mode 100644 index 000000000..e7b569522 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_001.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/help-infer + exit 255 +} + +trap trap_err_onexit ERR + +# copy help-infer to bin folder +cp help-infer ../bin/help-infer + +# run test +one-infer -b help -- -h > ${filename}.log + +rm -rf ../bin/help-infer + +if grep -q "HELP MESSAGE!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_002.test b/compiler/one-cmds/tests/one-infer_002.test new file mode 100644 index 000000000..22070de19 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_002.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -d dummy-infer -- ${inputfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_003.test b/compiler/one-cmds/tests/one-infer_003.test new file mode 100644 index 000000000..e2aa459a1 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_003.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -b dummy -- ${inputfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_004.test b/compiler/one-cmds/tests/one-infer_004.test new file mode 100644 index 000000000..a4cb76c55 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_004.test @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# print one-infer's help message + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -h > ${filename}.log + +if grep -q "command line tool to infer model" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_005.cfg b/compiler/one-cmds/tests/one-infer_005.cfg new file mode 100644 index 000000000..aca687801 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_005.cfg @@ -0,0 +1,3 @@ +[one-infer] +backend=dummy +command=sample.tvn diff --git a/compiler/one-cmds/tests/one-infer_005.test b/compiler/one-cmds/tests/one-infer_005.test new file mode 100644 index 000000000..a44dd0e25 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_005.test @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer with configuration input + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +configfile="one-infer_005.cfg" +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -C ${configfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_006.test b/compiler/one-cmds/tests/one-infer_006.test new file mode 100644 index 000000000..2612133a3 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_006.test @@ -0,0 +1,53 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer with post process script + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -b dummy --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1 +return_code=$? + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + if [ "$return_code" -eq "0" ]; then + echo "${filename_ext} SUCCESS" + exit 0 + fi +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/one-infer_neg_001.test b/compiler/one-cmds/tests/one-infer_neg_001.test new file mode 100644 index 000000000..62e721128 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_001.test @@ -0,0 +1,39 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with no input + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "error: the following arguments are required: {-d/--driver | -b/--backend}" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_002.test b/compiler/one-cmds/tests/one-infer_neg_002.test new file mode 100644 index 000000000..fa88876e8 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_002.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# passed driver is not found + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +driver_name="neg-infer" + +trap_err_onexit() +{ + if grep -q "FileNotFoundError: ${driver_name} not found" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -d ${driver_name} -- -h> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_003.test b/compiler/one-cmds/tests/one-infer_neg_003.test new file mode 100644 index 000000000..a0005520f --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_003.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# passed backend is not found + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +backend_name="neg" + +trap_err_onexit() +{ + if grep -q "FileNotFoundError: ${backend_name}-infer not found" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -b ${backend_name} -- -h> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_004.test b/compiler/one-cmds/tests/one-infer_neg_004.test new file mode 100644 index 000000000..b9130d051 --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_004.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# both -b and -d option drivers are given as argument + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +backend_name="neg" +driver_name="neg2" + +trap_err_onexit() +{ + if grep -q "\-d and -b options are mutually exclusive. Please use only one of them" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +# run test +one-infer -d ${driver_name} -b ${backend_name} -- -h> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-infer_neg_005.test b/compiler/one-cmds/tests/one-infer_neg_005.test new file mode 100644 index 000000000..9074debcf --- /dev/null +++ b/compiler/one-cmds/tests/one-infer_neg_005.test @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer with invalid post process script + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + return_code=$? + if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + # Case of succeed of inference driver but error after it + if [ "$return_code" -ne "0" ]; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + fi + + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-infer + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="sample.tvn" + +if [[ ! -s "${inputfile}" ]]; then + touch ${inputfile} +fi + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +one-infer -b dummy --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1 + +rm -rf ../bin/dummy-infer +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-optimize_001.test b/compiler/one-cmds/tests/one-optimize_001.test index 8eb58f4eb..4152fa3dd 100644 --- a/compiler/one-cmds/tests/one-optimize_001.test +++ b/compiler/one-cmds/tests/one-optimize_001.test @@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then fi # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path ${inputfile} \ --output_path ${outputfile} > /dev/null 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_002.test b/compiler/one-cmds/tests/one-optimize_002.test index bd64494be..58f792bf8 100644 --- a/compiler/one-cmds/tests/one-optimize_002.test +++ b/compiler/one-cmds/tests/one-optimize_002.test @@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then fi # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --change_outputs InceptionV3/Logits/SpatialSqueeze1 \ --input_path ${inputfile} \ --output_path ${outputfile} > /dev/null 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_neg_001.test b/compiler/one-cmds/tests/one-optimize_neg_001.test index f0b5563c7..c67e3d489 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_001.test +++ b/compiler/one-cmds/tests/one-optimize_neg_001.test @@ -39,7 +39,7 @@ rm -rf ${outputfile} rm -rf ${outputfile}.log # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_neg_002.test b/compiler/one-cmds/tests/one-optimize_neg_002.test index 72f306e20..a1ef70216 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_002.test +++ b/compiler/one-cmds/tests/one-optimize_neg_002.test @@ -39,7 +39,7 @@ rm -rf ${outputfile} rm -rf ${outputfile}.log # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-optimize_neg_003.test b/compiler/one-cmds/tests/one-optimize_neg_003.test index 3fe7d330e..668a6c29d 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_003.test +++ b/compiler/one-cmds/tests/one-optimize_neg_003.test @@ -44,7 +44,7 @@ if [[ ! -s ${inputfile} ]]; then fi # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --input_path "${inputfile}" > "${filename}.log" 2>&1 echo "${filename_ext} FAILED" diff --git a/compiler/one-cmds/tests/one-optimize_neg_004.test b/compiler/one-cmds/tests/one-optimize_neg_004.test index e73911b54..5abd4c553 100644 --- a/compiler/one-cmds/tests/one-optimize_neg_004.test +++ b/compiler/one-cmds/tests/one-optimize_neg_004.test @@ -39,7 +39,7 @@ rm -rf ${outputfile} rm -rf ${filename}.log # run test -one-optimize --O1 \ +one-optimize --resolve_customop_add \ --change_outputs non_existing_node_name \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-partition_001.test b/compiler/one-cmds/tests/one-partition_001.test new file mode 100644 index 000000000..a6fba07d7 --- /dev/null +++ b/compiler/one-cmds/tests/one-partition_001.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +testmodel="Net_InstanceNorm_003" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="${testmodel}.circle" +partfile="${testmodel}.part" +outputfile="${testmodel}.conn.json" + +rm -rf ${testmodel}.000* +rm -rf ${testmodel}.conn.* +rm -rf ${testmodel}.*.log + +# run test +one-partition \ +--input_file ${inputfile} \ +--part_file ${partfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-partition_neg_001.test b/compiler/one-cmds/tests/one-partition_neg_001.test new file mode 100644 index 000000000..d54a94fa2 --- /dev/null +++ b/compiler/one-cmds/tests/one-partition_neg_001.test @@ -0,0 +1,51 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with invalid .part file (wrong comply value) + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +testmodel="Net_InstanceNorm_003" + +trap_err_onexit() +{ + if grep -q "ERROR" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="${testmodel}.circle" +partfile="${testmodel}.neg.part" +outputfile="${testmodel}.conn.json" + +rm -rf ${testmodel}.000* +rm -rf ${testmodel}.conn.* +rm -rf ${testmodel}.*.log +rm -rf ${filename}.log + +# run test +one-partition \ +--input_file ${inputfile} \ +--part_file ${partfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-partition_neg_002.test b/compiler/one-cmds/tests/one-partition_neg_002.test new file mode 100644 index 000000000..23fe84c05 --- /dev/null +++ b/compiler/one-cmds/tests/one-partition_neg_002.test @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with invalid .cfg file (no one-partition section) + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" +testmodel="Net_InstanceNorm_003" + +trap_err_onexit() +{ + if grep -q "'one-partition' section" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +cfgfile="${testmodel}.neg.cfg" + +rm -rf ${testmodel}.000* +rm -rf ${testmodel}.conn.* +rm -rf ${testmodel}.*.log +rm -rf ${filename}.log + +# run test +one-partition -C ${cfgfile}> ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/one-quantize_010.test b/compiler/one-cmds/tests/one-quantize_010.test new file mode 100644 index 000000000..1095ba0a0 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_010.test @@ -0,0 +1,65 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_010.q.circle" +datafile="./inception_v3_test_data.h5" + +rm -rf ${outputfile} + +# to create inception_v3.circle +if [[ ! -s ${inputfile} ]]; then + /bin/bash one-import_001.test > /dev/null 2>&1 + return_code=$? + if [[ ${return_code} != 0 ]]; then + trap_err_onexit + fi +fi + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--input_path ${inputfile} \ +--input_data ${datafile} \ +--output_path ${outputfile} \ +--evaluate_result \ +--test_data ${datafile} \ +--print_mpeir > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/one-quantize_011.test b/compiler/one-cmds/tests/one-quantize_011.test new file mode 100644 index 000000000..34d7f57b5 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_011.test @@ -0,0 +1,56 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_011.q.circle" +datafile="./inception_v3_test_data.h5" + +rm -rf ${outputfile} + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--input_path ${inputfile} \ +--input_data ${datafile} \ +--output_path ${outputfile} \ +--evaluate_result \ +--test_data ${datafile} \ +--print_top5_match > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/one-quantize_012.qconf.json b/compiler/one-cmds/tests/one-quantize_012.qconf.json new file mode 100644 index 000000000..4a15b04f5 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_012.qconf.json @@ -0,0 +1,16 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D", + "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool", + "InceptionV3/InceptionV3/Mixed_5b/concat", + "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool", + "InceptionV3/InceptionV3/Mixed_7c/concat", + "InceptionV3/Predictions/Reshape_1"], + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/one-cmds/tests/one-quantize_012.test b/compiler/one-cmds/tests/one-quantize_012.test new file mode 100644 index 000000000..fba18acc5 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_012.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_012.q.circle" + +rm -rf ${outputfile} + +# run test without input data +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--quant_config one-quantize_012.qconf.json \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_013.qconf.json b/compiler/one-cmds/tests/one-quantize_013.qconf.json new file mode 100644 index 000000000..4a15b04f5 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_013.qconf.json @@ -0,0 +1,16 @@ +{ + "default_quantization_dtype" : "uint8", + "default_granularity" : "channel", + "layers" : [ + { + "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D", + "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool", + "InceptionV3/InceptionV3/Mixed_5b/concat", + "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool", + "InceptionV3/InceptionV3/Mixed_7c/concat", + "InceptionV3/Predictions/Reshape_1"], + "dtype" : "int16", + "granularity" : "channel" + } + ] +} diff --git a/compiler/one-cmds/tests/one-quantize_013.test b/compiler/one-cmds/tests/one-quantize_013.test new file mode 100644 index 000000000..fd443d627 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_013.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# quantized_dtype and granularity are given by qconfig file +# (not by command line interface) + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_013.q.circle" + +rm -rf ${outputfile} + +# run test without input data +# quantized_dtype and granularity are not given here +one-quantize \ +--input_dtype float32 \ +--quant_config one-quantize_013.qconf.json \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_014.test b/compiler/one-cmds/tests/one-quantize_014.test new file mode 100644 index 000000000..518c32841 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_014.test @@ -0,0 +1,59 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test if `circle-eval-diff` supports directory input. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.one-quantize_014.q.circle" +datadir="./raw_files/" + +rm -rf ${outputfile} + +# run test +one-quantize \ +--input_dtype float32 \ +--quantized_dtype uint8 \ +--granularity channel \ +--input_path ${inputfile} \ +--input_data ${datadir} \ +--input_data_format dir \ +--output_path ${outputfile} \ +--evaluate_result \ +--test_data ${datadir} \ +--print_top5_match > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/one-quantize_015.test b/compiler/one-cmds/tests/one-quantize_015.test new file mode 100644 index 000000000..bb45b5722 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_015.test @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test if --fake_quantize option works well + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.mat.q8.circle" +outputfile="./inception_v3.one-quantize_015.fq.circle" + +rm -rf ${outputfile} + +# run test +one-quantize \ +--fake_quantize \ +--input_path ${inputfile} \ +--output_path ${outputfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test index ac920a4fe..e182edf78 100644 --- a/compiler/one-cmds/tests/one-quantize_neg_019.test +++ b/compiler/one-cmds/tests/one-quantize_neg_019.test @@ -42,7 +42,7 @@ one-quantize \ --input_dtype float32 \ --quantized_dtype int16 \ --granularity channel \ ---input_type float32 \ +--input_type float64 \ --input_path ${inputfile} \ --output_path ${outputfile} > ${filename}.log 2>&1 diff --git a/compiler/one-cmds/tests/one-quantize_neg_020.test b/compiler/one-cmds/tests/one-quantize_neg_020.test new file mode 100644 index 000000000..27b11c3e6 --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_neg_020.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# check error message is printed when qconfig file is not json + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Failed to decode" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.circle" +outputfile="./inception_v3.quantized.neg_020.circle" + +rm -rf ${outputfile}.log + +# run test +one-quantize \ +--input_dtype float32 \ +--quant_config one-quantize_neg_020.test \ +--input_path ${inputfile} \ +--output_path ${outputfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_008.cfg b/compiler/one-cmds/tests/onecc_008.cfg index 0be026e6e..020e274e1 100644 --- a/compiler/one-cmds/tests/onecc_008.cfg +++ b/compiler/one-cmds/tests/onecc_008.cfg @@ -15,7 +15,6 @@ output_path=test_onnx_model.circle [one-optimize] input_path=test_onnx_model.circle output_path=test_onnx_model.opt.circle -all=True remove_redundant_transpose=True [one-codegen] diff --git a/compiler/one-cmds/tests/onecc_009.cfg b/compiler/one-cmds/tests/onecc_009.cfg index a17ae59cb..86121c557 100644 --- a/compiler/one-cmds/tests/onecc_009.cfg +++ b/compiler/one-cmds/tests/onecc_009.cfg @@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle [one-optimize] input_path=onnx_conv2d_conv2d.circle output_path=onnx_conv2d_conv2d.opt.circle -all=True remove_redundant_transpose=True convert_nchw_to_nhwc=True diff --git a/compiler/one-cmds/tests/onecc_024.cfg b/compiler/one-cmds/tests/onecc_024.cfg new file mode 100644 index 000000000..7b4b1a80a --- /dev/null +++ b/compiler/one-cmds/tests/onecc_024.cfg @@ -0,0 +1,22 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-import-onnx=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v1 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle +make_batchnorm_gamma_positive=False diff --git a/compiler/one-cmds/tests/onecc_024.test b/compiler/one-cmds/tests/onecc_024.test new file mode 100644 index 000000000..1f5daa13e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_024.test @@ -0,0 +1,77 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use `OONECC_024` optimization option + +: ' +This test assumes below directories. + +[one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test # pwd +' + +OPT_ALREADY_EXIST=true + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +clean_envir() +{ + rm -rf ../optimization/OONECC_024.cfg + if [ "$OPT_ALREADY_EXIST" = false ]; then + rm -rf ../optimization + fi +} + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + clean_envir + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_024.cfg" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +if [ ! -d "../optimization" ]; then + mkdir -p ../optimization + OPT_ALREADY_EXIST=false +fi + +cp OONECC_024.cfg ../optimization + +# run test +LUCI_LOG=5 onecc -C ${configfile} -OONECC_024 > ${filename}.log 2>&1 + +clean_envir + +if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/onecc_025.cfg b/compiler/one-cmds/tests/onecc_025.cfg new file mode 100644 index 000000000..4776ea80e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_025.cfg @@ -0,0 +1,20 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle diff --git a/compiler/one-cmds/tests/onecc_025.test b/compiler/one-cmds/tests/onecc_025.test new file mode 100644 index 000000000..396f40cea --- /dev/null +++ b/compiler/one-cmds/tests/onecc_025.test @@ -0,0 +1,40 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-import-tf -> one-optimize with the configuration file that includes `onecc` section + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_001.cfg" +outputfile="inception_v3.opt.circle" + +# run test +onecc -C ${configfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_026.cfg b/compiler/one-cmds/tests/onecc_026.cfg new file mode 100644 index 000000000..c27a13654 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_026.cfg @@ -0,0 +1,16 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=True +one-pack=False +one-codegen=False + +[one-quantize] +input_path=inception_v3.circle +output_path=inception_v3.onecc_026.q.circle +input_data=inception_v3_test_data.h5 +evaluate_result=True +test_data=inception_v3_test_data.h5 +print_mpeir=True diff --git a/compiler/one-cmds/tests/onecc_026.test b/compiler/one-cmds/tests/onecc_026.test new file mode 100644 index 000000000..84cfa4146 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_026.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +configfile="onecc_026.cfg" +outputfile="inception_v3.onecc_026.q.circle" + +rm -rf ${outputfile} + +# run test +onecc -C ${configfile} > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/onecc_027.cfg b/compiler/one-cmds/tests/onecc_027.cfg new file mode 100644 index 000000000..d3f6b5e82 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_027.cfg @@ -0,0 +1,15 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-import-onnx=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False +one-profile=False +one-infer=True + +[one-infer] +backend=dummy +command=test_onnx_model.bin diff --git a/compiler/one-cmds/tests/onecc_027.test b/compiler/one-cmds/tests/onecc_027.test new file mode 100644 index 000000000..e727359ef --- /dev/null +++ b/compiler/one-cmds/tests/onecc_027.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-infer + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-profile + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_027.cfg" + +# copy dummy-infer to bin folder +cp dummy-infer ../bin/dummy-infer + +# run test +onecc -C ${configfile} > ${filename}.log + +rm -rf ../bin/dummy-infer + +if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 +fi + +trap_err_onexit diff --git a/compiler/one-cmds/tests/onecc_028.test b/compiler/one-cmds/tests/onecc_028.test new file mode 100644 index 000000000..10ce1583b --- /dev/null +++ b/compiler/one-cmds/tests/onecc_028.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize -> one-pack + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_028.workflow.json" +outputfile="inception_v3_pkg" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_028.workflow.json b/compiler/one-cmds/tests/onecc_028.workflow.json new file mode 100644 index 000000000..84bfd01fa --- /dev/null +++ b/compiler/one-cmds/tests/onecc_028.workflow.json @@ -0,0 +1,37 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF", + "OPTIMIZE", + "PACK" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "PACK": { + "one-cmd": "one-pack", + "commands": { + "input_path": "inception_v3.opt.circle", + "output_path": "inception_v3_pkg" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_029.test b/compiler/one-cmds/tests/onecc_029.test new file mode 100644 index 000000000..9bab1a1ee --- /dev/null +++ b/compiler/one-cmds/tests/onecc_029.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-quantize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_029.workflow.json" +outputfile="inception_v3.quantized.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_029.workflow.json b/compiler/one-cmds/tests/onecc_029.workflow.json new file mode 100644 index 000000000..65c9ea662 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_029.workflow.json @@ -0,0 +1,30 @@ +{ + "workflows": [ + "QUANTIZE_WORKFLOW" + ], + "QUANTIZE_WORKFLOW": { + "steps": [ + "IMPORT_TF", + "QUANTIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.quantized.circle", + "input_data": "inception_v3_test_data.h5" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_030.test b/compiler/one-cmds/tests/onecc_030.test new file mode 100644 index 000000000..c0aa56a51 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_030.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-codegen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_030.workflow.json" +outputfile="sample.tvn" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_030.workflow.json b/compiler/one-cmds/tests/onecc_030.workflow.json new file mode 100644 index 000000000..111a1b034 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_030.workflow.json @@ -0,0 +1,29 @@ +{ + "workflows": [ + "codegen_wf" + ], + "codegen_wf": { + "steps": [ + "import_tf", + "codegen" + ], + "import_tf": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o sample.tvn inception_v3.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_031.test b/compiler/one-cmds/tests/onecc_031.test new file mode 100644 index 000000000..7a1c670c8 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_031.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tflite -> one-optimize -> one-codgen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_031.workflow.json" +outputfile="sample.tvn" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_031.workflow.json b/compiler/one-cmds/tests/onecc_031.workflow.json new file mode 100644 index 000000000..83d52b942 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_031.workflow.json @@ -0,0 +1,33 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "codegen" + ], + "import": { + "one-cmd": "one-import-tflite", + "commands": { + "input_path": "inception_v3.tflite", + "output_path": "inception_v3.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o sample.tvn inception_v3.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_032.test b/compiler/one-cmds/tests/onecc_032.test new file mode 100644 index 000000000..89b6c41a5 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_032.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-codegen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_032.workflow.json" +outputfile="sample.tvn" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_032.workflow.json b/compiler/one-cmds/tests/onecc_032.workflow.json new file mode 100644 index 000000000..08d3f0f5c --- /dev/null +++ b/compiler/one-cmds/tests/onecc_032.workflow.json @@ -0,0 +1,42 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "quantize", + "codegen" + ], + "import": { + "one-cmd": "one-import-tflite", + "commands": { + "input_path": "inception_v3.tflite", + "output_path": "inception_v3.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "quantize": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.quantized.circle", + "input_data": "inception_v3_test_data.h5" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o sample.tvn inception_v3.quantized.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_033.test b/compiler/one-cmds/tests/onecc_033.test new file mode 100644 index 000000000..635582f61 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_033.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-pack + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_033.workflow.json" +outputfile="inception_v3_pkg" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_033.workflow.json b/compiler/one-cmds/tests/onecc_033.workflow.json new file mode 100644 index 000000000..01233ffd9 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_033.workflow.json @@ -0,0 +1,42 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "quantize", + "pack" + ], + "import": { + "one-cmd": "one-import-tflite", + "commands": { + "input_path": "inception_v3.tflite", + "output_path": "inception_v3.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + }, + "quantize": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.quantized.circle", + "input_data": "inception_v3_test_data.h5" + } + }, + "pack": { + "one-cmd": "one-pack", + "commands": { + "input_path": "inception_v3.quantized.circle", + "output_path": "inception_v3_pkg" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_034.test b/compiler/one-cmds/tests/onecc_034.test new file mode 100644 index 000000000..e76654809 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_034.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-onnx -> one-optimize -> one-codegen + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + rm -rf ../bin/dummy-compile + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_034.workflow.json" +outputfile="onnx_conv2d_conv2d.bin" + +rm -rf ${outputfile} + +# copy dummy-compile to bin folder +cp dummy-compile ../bin/dummy-compile + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +rm -rf ../bin/dummy-compile + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_034.workflow.json b/compiler/one-cmds/tests/onecc_034.workflow.json new file mode 100644 index 000000000..bc3cbbf58 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_034.workflow.json @@ -0,0 +1,35 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import", + "optimize", + "codegen" + ], + "import": { + "one-cmd": "one-import-onnx", + "commands": { + "input_path": "onnx_conv2d_conv2d.onnx", + "output_path": "onnx_conv2d_conv2d.circle" + } + }, + "optimize": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "onnx_conv2d_conv2d.circle", + "output_path": "onnx_conv2d_conv2d.opt.circle", + "remove_redundant_transpose": "True", + "convert_nchw_to_nhwc": "True" + } + }, + "codegen": { + "one-cmd": "one-codegen", + "commands": { + "backend": "dummy", + "command": "-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_035.test b/compiler/one-cmds/tests/onecc_035.test new file mode 100644 index 000000000..762cdd31a --- /dev/null +++ b/compiler/one-cmds/tests/onecc_035.test @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf generates intermediate files + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_035.workflow.json" +outputfile="inception_v3.alt.circle" +intermfile="inception_v3.alt.tflite" + +rm -rf ${outputfile} +rm -rf ${intermfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi +if [[ ! -s "${intermfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_035.workflow.json b/compiler/one-cmds/tests/onecc_035.workflow.json new file mode 100644 index 000000000..6abf1f32b --- /dev/null +++ b/compiler/one-cmds/tests/onecc_035.workflow.json @@ -0,0 +1,22 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import" + ], + "import": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.alt.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v1", + "save_intermediate": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_036.test b/compiler/one-cmds/tests/onecc_036.test new file mode 100644 index 000000000..865255e9f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_036.test @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-onnx generates intermediate files + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_036.workflow.json" +outputfile="test_onnx_model.circle" +intermfile="test_onnx_model.tflite" + +rm -rf ${outputfile} +rm -rf ${intermfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi +if [[ ! -s "${intermfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_036.workflow.json b/compiler/one-cmds/tests/onecc_036.workflow.json new file mode 100644 index 000000000..5fa29edb5 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_036.workflow.json @@ -0,0 +1,18 @@ +{ + "workflows": [ + "wf" + ], + "wf": { + "steps": [ + "import" + ], + "import": { + "one-cmd": "one-import-onnx", + "commands": { + "input_path": "test_onnx_model.onnx", + "output_path": "test_onnx_model.circle", + "save_intermediate": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_037.test b/compiler/one-cmds/tests/onecc_037.test new file mode 100644 index 000000000..52ea9e4c7 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_037.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-optimize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_037.workflow.json" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_037.workflow.json b/compiler/one-cmds/tests/onecc_037.workflow.json new file mode 100644 index 000000000..3317fb27a --- /dev/null +++ b/compiler/one-cmds/tests/onecc_037.workflow.json @@ -0,0 +1,29 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "IMPORT", + "OPTIMIZE" + ], + "IMPORT": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_038.test b/compiler/one-cmds/tests/onecc_038.test new file mode 100644 index 000000000..6b8f7cf64 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_038.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-import-tf -> one-quantize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_038.workflow.json" +outputfile="inception_v3.list.quantized.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_038.workflow.json b/compiler/one-cmds/tests/onecc_038.workflow.json new file mode 100644 index 000000000..5ac515d00 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_038.workflow.json @@ -0,0 +1,31 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "IMPORT", + "QUANTIZE" + ], + "IMPORT": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.list.quantized.circle", + "input_data": "datalist.txt", + "input_data_format": "list" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_039.test b/compiler/one-cmds/tests/onecc_039.test new file mode 100644 index 000000000..7db9d901c --- /dev/null +++ b/compiler/one-cmds/tests/onecc_039.test @@ -0,0 +1,48 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow where one-quantize quantizes the model and evaluates the result + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +workflowfile="onecc_039.workflow.json" +outputfile="inception_v3.onecc_039.q.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +check_message diff --git a/compiler/one-cmds/tests/onecc_039.workflow.json b/compiler/one-cmds/tests/onecc_039.workflow.json new file mode 100644 index 000000000..55ef56988 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_039.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "QUANTIZE" + ], + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_040.cfg b/compiler/one-cmds/tests/onecc_040.cfg new file mode 100644 index 000000000..4776ea80e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_040.cfg @@ -0,0 +1,20 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle diff --git a/compiler/one-cmds/tests/onecc_040.test b/compiler/one-cmds/tests/onecc_040.test new file mode 100644 index 000000000..2f7567730 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_040.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflow with cfg reference + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_040.workflow.json" +outputfile="inception_v3.opt.circle" + +rm -rf ${outputfile} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/onecc_040.workflow.json b/compiler/one-cmds/tests/onecc_040.workflow.json new file mode 100644 index 000000000..2d4119b21 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_040.workflow.json @@ -0,0 +1,10 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "cfg-reference": { + "path": "onecc_040.cfg" + } + } +} diff --git a/compiler/one-cmds/tests/onecc_041.cfg b/compiler/one-cmds/tests/onecc_041.cfg new file mode 100644 index 000000000..16135f074 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_041.cfg @@ -0,0 +1,16 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3_without_opt.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 diff --git a/compiler/one-cmds/tests/onecc_041.test b/compiler/one-cmds/tests/onecc_041.test new file mode 100644 index 000000000..791dd12ca --- /dev/null +++ b/compiler/one-cmds/tests/onecc_041.test @@ -0,0 +1,58 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# run a workflows + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +check_message() +{ + if grep -q "Do inference of inception_v3_without_opt\.circle" "${filename}.log" && + grep -q "Do inference of inception_v3\.opt\.circle" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + trap_err_onexit +} + +trap trap_err_onexit ERR + +workflowfile="onecc_041.workflow.json" +outputfile1="inception_v3_without_opt.circle" +outputfile2="inception_v3.opt.circle" + +cp dummy-inferV2 ../bin/dummy-inferV2 + +rm -rf ${outputfile1} {outputfile2} + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +rm -rf ../bin/dummy-inferV2 + +if [[ ! -s "${outputfile1}" ]] && [[ ! -s "${outputfile2}" ]]; then + trap_err_onexit +fi + +check_message diff --git a/compiler/one-cmds/tests/onecc_041.workflow.json b/compiler/one-cmds/tests/onecc_041.workflow.json new file mode 100644 index 000000000..7dfc1c664 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_041.workflow.json @@ -0,0 +1,61 @@ +{ + "workflows": [ + "WITHOUT_OPT", + "WITH_OPT", + "INFER" + ], + "INFER": { + "run-after": [ + "WITHOUT_OPT", + "WITH_OPT" + ], + "steps": [ + "INFER1", + "INFER2" + ], + "INFER1": { + "one-cmd": "one-infer", + "commands" : { + "driver": "dummy-inferV2", + "command": "inception_v3_without_opt.circle" + } + }, + "INFER2": { + "one-cmd": "one-infer", + "commands": { + "driver": "dummy-inferV2", + "command": "inception_v3.opt.circle" + } + } + }, + "WITHOUT_OPT": { + "cfg-reference": { + "path": "onecc_041.cfg" + } + }, + "WITH_OPT": { + "steps": [ + "IMPORT_TF", + "OPTIMIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + } + } + +} diff --git a/compiler/one-cmds/tests/onecc_neg_009.test b/compiler/one-cmds/tests/onecc_neg_009.test new file mode 100644 index 000000000..54dd129e4 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_009.test @@ -0,0 +1,69 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Valid optimization option but invalid configuration file path + +: ' +This test assumes below directories. + +[one hierarchy] + one + ├── backends + ├── bin + ├── doc + ├── include + ├── lib + ├── optimization + └── test # pwd +' + +OPT_ALREADY_EXIST=true + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + rm -rf ../optimization/OONECC_NEG_009.cfg + if [ "$OPT_ALREADY_EXIST" = false ]; then + rm -rf ../optimization + fi + if grep -q "Not found given configuration file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +if [ ! -d "../optimization" ]; then + mkdir -p ../optimization + OPT_ALREADY_EXIST=false +fi + + +touch ../optimization/OONECC_NEG_009.cfg + +configfile=".." + +# run test +onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_010.test b/compiler/one-cmds/tests/onecc_neg_010.test new file mode 100644 index 000000000..ddad5e6de --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_010.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Invalid optimization option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Invalid optimization option" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile=".." + +# run test +onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_011.cfg b/compiler/one-cmds/tests/onecc_neg_011.cfg new file mode 100644 index 000000000..b5873245b --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_011.cfg @@ -0,0 +1,13 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=True +one-quantize=False +one-pack=False +one-codegen=False + +[one-optimize] +input_path=inception_v3.circle +output_path=inception_v3.opt.circle +wrong_opt=True diff --git a/compiler/one-cmds/tests/onecc_neg_011.test b/compiler/one-cmds/tests/onecc_neg_011.test new file mode 100644 index 000000000..3f043a77e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_011.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# generate error for unrecognized opitmization option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "following arguments are unrecognized" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_neg_011.cfg" + +# run test +onecc -C ${configfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_012.cfg b/compiler/one-cmds/tests/onecc_neg_012.cfg new file mode 100644 index 000000000..fdc73ef43 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_012.cfg @@ -0,0 +1,15 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False +one-profile=False +one-infer=True + +[one-infer] +driver=dummy-infer +backend=dummy +command="dummy arguments" diff --git a/compiler/one-cmds/tests/onecc_neg_012.test b/compiler/one-cmds/tests/onecc_neg_012.test new file mode 100644 index 000000000..9feca5f54 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_012.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Check driver and backend option is mutually exclusive + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "\-d and -b options are mutually exclusive" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_neg_012.cfg" + +# run test +onecc -C ${configfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_013.test b/compiler/one-cmds/tests/onecc_neg_013.test new file mode 100644 index 000000000..0dd8a0fdd --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_013.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with missing workflow file + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not found given workflow file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_013.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_014.test b/compiler/one-cmds/tests/onecc_neg_014.test new file mode 100644 index 000000000..2ed5dcbf5 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_014.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# invalid workflow file + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Invalid workflow file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_014.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_014.workflow.json b/compiler/one-cmds/tests/onecc_neg_014.workflow.json new file mode 100644 index 000000000..8d4fd431e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_014.workflow.json @@ -0,0 +1,3 @@ +{ + INVALID JSON FILE +} diff --git a/compiler/one-cmds/tests/onecc_neg_015.test b/compiler/one-cmds/tests/onecc_neg_015.test new file mode 100644 index 000000000..079ba677a --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_015.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not found" "${filename}.log" && + grep -q "key in workflow file" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_015.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_015.workflow.json b/compiler/one-cmds/tests/onecc_neg_015.workflow.json new file mode 100644 index 000000000..4cb752e4e --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_015.workflow.json @@ -0,0 +1,21 @@ +{ + "workflowsssssss": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "steps": [ + "QUANTIZE" + ], + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_016.test b/compiler/one-cmds/tests/onecc_neg_016.test new file mode 100644 index 000000000..c52763f47 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_016.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Not found" "${filename}.log" && + grep -q "key listed in" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_016.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_016.workflow.json b/compiler/one-cmds/tests/onecc_neg_016.workflow.json new file mode 100644 index 000000000..c929cf38c --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_016.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOWWWWW": { + "steps": [ + "QUANTIZE" + ], + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_017.test b/compiler/one-cmds/tests/onecc_neg_017.test new file mode 100644 index 000000000..2f173d2f6 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_017.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Each workflow should have either" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_017.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_017.workflow.json b/compiler/one-cmds/tests/onecc_neg_017.workflow.json new file mode 100644 index 000000000..22f1415e9 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_017.workflow.json @@ -0,0 +1,18 @@ +{ + "workflows": [ + "SIMPLE_WORKFLOW" + ], + "SIMPLE_WORKFLOW": { + "QUANTIZE": { + "one-cmd": "one-quantize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.onecc_026.q.circle", + "input_data": "inception_v3_test_data.h5", + "evaluate_result": "True", + "test_data": "inception_v3_test_data.h5", + "print_mpeir": "True" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_018.test b/compiler/one-cmds/tests/onecc_neg_018.test new file mode 100644 index 000000000..bc2297ed0 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_018.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "are exclusive key" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_018.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_018.workflow.json b/compiler/one-cmds/tests/onecc_neg_018.workflow.json new file mode 100644 index 000000000..58cb88e17 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_018.workflow.json @@ -0,0 +1,24 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF" + ], + "cfg-reference": { + "path": "/path/to/ini/format/file" + }, + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_019.test b/compiler/one-cmds/tests/onecc_neg_019.test new file mode 100644 index 000000000..11ef3a9ee --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_019.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Each step should have" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_019.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_019.workflow.json b/compiler/one-cmds/tests/onecc_neg_019.workflow.json new file mode 100644 index 000000000..aedeeecca --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_019.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmddddddddd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_020.test b/compiler/one-cmds/tests/onecc_neg_020.test new file mode 100644 index 000000000..7f5073d82 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_020.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflow file has invalid key + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Each step should have" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_020.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_020.workflow.json b/compiler/one-cmds/tests/onecc_neg_020.workflow.json new file mode 100644 index 000000000..d3446d38f --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_020.workflow.json @@ -0,0 +1,21 @@ +{ + "workflows": [ + "MY_WORKFLOW" + ], + "MY_WORKFLOW": { + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commandssssssssss": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_021.test b/compiler/one-cmds/tests/onecc_neg_021.test new file mode 100644 index 000000000..e9d4baaee --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_021.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflows have a cycle + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Workflows should not have a cycle" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_021.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_021.workflow.json b/compiler/one-cmds/tests/onecc_neg_021.workflow.json new file mode 100644 index 000000000..6d21111af --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_021.workflow.json @@ -0,0 +1,44 @@ +{ + "workflows": [ + "CYCLE_WF1", + "CYCLE_WF2" + ], + "CYCLE_WF1": { + "run-after": [ + "CYCLE_WF2" + ], + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + }, + "CYCLE_WF2": { + "run-after": [ + "CYCLE_WF1" + ], + "steps": [ + "IMPORT_TF" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_022.cfg b/compiler/one-cmds/tests/onecc_neg_022.cfg new file mode 100644 index 000000000..16135f074 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_022.cfg @@ -0,0 +1,16 @@ +[onecc] +one-import-tf=True +one-import-tflite=False +one-import-bcq=False +one-optimize=False +one-quantize=False +one-pack=False +one-codegen=False + +[one-import-tf] +input_path=inception_v3.pb +output_path=inception_v3_without_opt.circle +input_arrays=input +input_shapes=1,299,299,3 +output_arrays=InceptionV3/Predictions/Reshape_1 +converter_version=v2 diff --git a/compiler/one-cmds/tests/onecc_neg_022.test b/compiler/one-cmds/tests/onecc_neg_022.test new file mode 100644 index 000000000..540071729 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_022.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflows have a cycle + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Workflows should not have a cycle" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_022.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_022.workflow.json b/compiler/one-cmds/tests/onecc_neg_022.workflow.json new file mode 100644 index 000000000..2e056acf1 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_022.workflow.json @@ -0,0 +1,63 @@ +{ + "workflows": [ + "WITHOUT_OPT", + "WITH_OPT", + "INFER" + ], + "INFER": { + "run-after": [ + "WITHOUT_OPT", + "WITH_OPT" + ], + "steps": [ + "INFER1", + "INFER2" + ], + "INFER1": { + "one-cmd": "one-infer", + "commands" : { + "driver": "dummy-inferV2", + "command": "inception_v3_without_opt.circle" + } + }, + "INFER2": { + "one-cmd": "one-infer", + "commands": { + "driver": "dummy-inferV2", + "command": "inception_v3.opt.circle" + } + } + }, + "WITHOUT_OPT": { + "cfg-reference": { + "path": "onecc_041.cfg" + } + }, + "WITH_OPT": { + "run-after": [ + "WITHOUT_OPT" + ], + "steps": [ + "IMPORT_TF", + "OPTIMIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle" + } + } + } +} diff --git a/compiler/one-cmds/tests/onecc_neg_023.test b/compiler/one-cmds/tests/onecc_neg_023.test new file mode 100644 index 000000000..09717e8ad --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_023.test @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# workflows have wrong optimize option + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "Change outputs failed" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +workflowfile="onecc_neg_023.workflow.json" + +# run test +onecc -W ${workflowfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_neg_023.workflow.json b/compiler/one-cmds/tests/onecc_neg_023.workflow.json new file mode 100644 index 000000000..056e704fd --- /dev/null +++ b/compiler/one-cmds/tests/onecc_neg_023.workflow.json @@ -0,0 +1,30 @@ +{ + "workflows": [ + "WITH_OPT" + ], + "WITH_OPT": { + "steps": [ + "IMPORT_TF", + "OPTIMIZE" + ], + "IMPORT_TF": { + "one-cmd": "one-import-tf", + "commands": { + "input_path": "inception_v3.pb", + "output_path": "inception_v3.circle", + "input_arrays": "input", + "input_shapes": "1,299,299,3", + "output_arrays": "InceptionV3/Predictions/Reshape_1", + "converter_version": "v2" + } + }, + "OPTIMIZE": { + "one-cmd": "one-optimize", + "commands": { + "input_path": "inception_v3.circle", + "output_path": "inception_v3.opt.circle", + "change_outputs": "non_existing_node_name" + } + } + } +} diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh index c80c59834..c171cfe01 100644 --- a/compiler/one-cmds/tests/prepare_test_materials.sh +++ b/compiler/one-cmds/tests/prepare_test_materials.sh @@ -91,6 +91,20 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444 fi +if [[ ! -s "reshape_matmul.onnx" ]]; then + rm -rf reshape_matmul.zip + wget https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip + unzip reshape_matmul.zip + # https://github.com/Samsung/ONE/issues/9405#issuecomment-1180198137 +fi + +if [[ ! -s "Net_InstanceNorm_003.part" ]]; then + rm -rf Net_InstanceNorm_003.zip + wget https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip + unzip Net_InstanceNorm_003.zip + # https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257 +fi + function files_missing() { condition="test " diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py index be0322aca..d204447fd 100644 --- a/compiler/one-cmds/utils.py +++ b/compiler/one-cmds/utils.py @@ -47,6 +47,25 @@ def _add_default_arg(parser): parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS) +def _add_default_arg_no_CS(parser): + """ + This adds -v -V args only (no -C nor -S) + """ + # version + parser.add_argument( + '-v', + '--version', + action='store_true', + help='show program\'s version number and exit') + + # verbose + parser.add_argument( + '-V', + '--verbose', + action='store_true', + help='output additional information to stdout or stderr') + + def is_accumulated_arg(arg, driver): if driver == "one-quantize": accumulables = [ @@ -62,6 +81,43 @@ def _is_valid_attr(args, attr): return hasattr(args, attr) and getattr(args, attr) +class Command: + def __init__(self, driver, args, log_file): + self.cmd = [driver] + self.driver = driver + self.args = args + self.log_file = log_file + + # Add option if attrs are valid + # Option values are collected from self.args + def add_option_with_valid_args(self, option, attrs): + for attr in attrs: + if not _is_valid_attr(self.args, attr): + return self + self.cmd.append(option) + for attr in attrs: + self.cmd.append(getattr(self.args, attr)) + return self + + # Add option and values without any condition + def add_option_with_values(self, option, values): + self.cmd.append(option) + for value in values: + self.cmd.append(value) + return self + + # Add option with no argument (ex: --verbose) if attr is valid + def add_noarg_option_if_valid_arg(self, option, attr): + if _is_valid_attr(self.args, attr): + self.cmd.append(option) + return self + + # Run cmd and save logs + def run(self): + self.log_file.write((' '.join(self.cmd) + '\n').encode()) + _run(self.cmd, err_prefix=self.driver, logfile=self.log_file) + + def _parse_cfg_and_overwrite(config_path, section, args): """ parse given section of configuration file and set the values of args. @@ -153,8 +209,7 @@ def _run(cmd, err_prefix=None, logfile=None): err_prefix: prefix to be put before every stderr lines logfile: file stream to which both of stdout and stderr lines will be written """ - with subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p: + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: import select inputs = set([p.stdout, p.stderr]) while inputs: diff --git a/compiler/onnx-tools/CMakeLists.txt b/compiler/onnx-tools/CMakeLists.txt index ac4500e0e..5935cdfbe 100644 --- a/compiler/onnx-tools/CMakeLists.txt +++ b/compiler/onnx-tools/CMakeLists.txt @@ -18,4 +18,10 @@ foreach(ONNX_TOOL IN ITEMS ${ONNX_TOOL_FILES}) add_custom_target(${ONNX_TOOL_TARGET} ALL DEPENDS ${ONNX_TOOL_BIN}) + install(FILES ${ONNX_TOOL_BIN} + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + DESTINATION bin) + endforeach(ONNX_TOOL) diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt index 51fd9a391..96dfc8687 100644 --- a/compiler/pota-quantization-value-test/CMakeLists.txt +++ b/compiler/pota-quantization-value-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + unset(QUANTIZATION_VALUE_TEST) unset(QUANTIZATION_VALUE_TEST_WITH_PARAM) unset(QUANTIZATION_CONFIG_VALUE_TEST) diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt index 31b906142..636361405 100644 --- a/compiler/record-minmax-conversion-test/CMakeLists.txt +++ b/compiler/record-minmax-conversion-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + unset(RECORD_MINMAX_CONVERSION_TEST) macro(addTest NAME) diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp index c9f1d0ca7..faa402f01 100644 --- a/compiler/record-minmax/driver/Driver.cpp +++ b/compiler/record-minmax/driver/Driver.cpp @@ -34,62 +34,33 @@ int entry(const int argc, char **argv) arser::Arser arser( "Embedding min/max values of activations to the circle model for post-training quantization"); - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); - arser.add_argument("--input_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("Input model filepath"); + arser.add_argument("--input_model").required(true).help("Input model filepath"); arser.add_argument("--input_data") - .nargs(1) - .type(arser::DataType::STR) - .required(false) .help("Input data filepath. If not given, record-minmax will run with randomly generated data. " "Note that the random dataset does not represent inference workload, leading to poor " "model accuracy."); - arser.add_argument("--output_model") - .nargs(1) - .type(arser::DataType::STR) - .required(true) - .help("Output model filepath"); + arser.add_argument("--output_model").required(true).help("Output model filepath"); arser.add_argument("--min_percentile") - .nargs(1) .type(arser::DataType::FLOAT) .help("Record n'th percentile of min"); arser.add_argument("--max_percentile") - .nargs(1) .type(arser::DataType::FLOAT) .help("Record n'th percentile of max"); - arser.add_argument("--mode") - .nargs(1) - .type(arser::DataType::STR) - .help("Record mode. percentile (default) or moving_average"); + arser.add_argument("--mode").help("Record mode. percentile (default) or moving_average"); arser.add_argument("--input_data_format") - .nargs(1) - .type(arser::DataType::STR) .help("Input data format. h5/hdf5 (default) or list/filelist"); arser.add_argument("--generate_profile_data") .nargs(0) - .required(false) .default_value(false) .help("This will turn on profiling data generation."); diff --git a/compiler/record-minmax/include/RecordFunction.h b/compiler/record-minmax/include/RecordFunction.h index ba199d071..5b993e4b3 100644 --- a/compiler/record-minmax/include/RecordFunction.h +++ b/compiler/record-minmax/include/RecordFunction.h @@ -18,7 +18,7 @@ #include <cassert> #include <algorithm> #include <cmath> -#include <numeric> +#include <limits> #include <stdexcept> namespace record_minmax diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp index 8288d3e5e..e6edbdca9 100644 --- a/compiler/record-minmax/src/MinMaxObserver.cpp +++ b/compiler/record-minmax/src/MinMaxObserver.cpp @@ -18,6 +18,7 @@ #include <luci/IR/CircleOpcode.h> +#include <limits> #include <math.h> using DataType = luci_interpreter::DataType; @@ -75,7 +76,7 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node, // Reshape changes only shape of input tensor, efficiently is it a no-op. return; default: - throw std::runtime_error("Tensor's data type is not float"); + throw std::runtime_error("Tensor's data type is not float. " + node->name()); } } diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp index 10a14516f..6dbf98dc6 100644 --- a/compiler/record-minmax/src/RecordMinMax.cpp +++ b/compiler/record-minmax/src/RecordMinMax.cpp @@ -186,7 +186,13 @@ void RecordMinMax::initialize(const std::string &input_model_path) throw std::runtime_error("Failed to verify circle '" + input_model_path + "'"); } - _module = luci::Importer().importModule(circle::GetModel(model_data.data())); + const circle::Model *circle_model = circle::GetModel(model_data.data()); + if (circle_model == nullptr) + { + throw std::runtime_error("Failed to load '" + input_model_path + "'"); + } + + _module = luci::Importer().importModule(circle_model); if (_module == nullptr) { diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt index f57102f1f..8dcf4c2b8 100644 --- a/compiler/souschef/CMakeLists.txt +++ b/compiler/souschef/CMakeLists.txt @@ -1,13 +1,20 @@ nnas_find_package(Protobuf QUIET) +nnas_find_package(Fp16Source QUIET) if(NOT Protobuf_FOUND) message(STATUS "Build souschef: FAILED (missing Protobuf)") return() endif(NOT Protobuf_FOUND) +if(NOT Fp16Source_FOUND) + message(STATUS "Build souschef: FAILED (missing Fp16Source)") + return() +endif(NOT Fp16Source_FOUND) + file(GLOB_RECURSE SOURCES "src/*.cpp") add_library(souschef STATIC ${SOURCES}) set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(souschef PRIVATE ${Fp16Source_DIR}/include) target_include_directories(souschef PUBLIC include) target_link_libraries(souschef PUBLIC libprotobuf) diff --git a/compiler/souschef/include/souschef/Data/Explicit.h b/compiler/souschef/include/souschef/Data/Explicit.h index 7cbb773da..434d0ec2c 100644 --- a/compiler/souschef/include/souschef/Data/Explicit.h +++ b/compiler/souschef/include/souschef/Data/Explicit.h @@ -96,6 +96,41 @@ template <typename T> struct ExplicitDataChefFactory : public DataChefFactory } }; +class ExplicitFloat16DataChef final : public DataChef +{ +public: + ExplicitFloat16DataChef() + { + // DO NOTHING + } + +public: + std::vector<uint8_t> generate(int32_t count) const override; + +public: + void insert(const float &value) { _values.emplace_back(value); } + +private: + // NOTE store values in float but will convert to uint16_t in generate() + std::vector<float> _values; +}; + +struct ExplicitFloat16DataChefFactory : public DataChefFactory +{ + std::unique_ptr<DataChef> create(const Arguments &args) const + { + std::unique_ptr<ExplicitFloat16DataChef> res{new ExplicitFloat16DataChef}; + + for (uint32_t n = 0; n < args.count(); ++n) + { + auto const value = to_number<float>(args.value(n)); + res->insert(value); + } + + return std::move(res); + } +}; + } // namespace souschef #endif // __SOUSCHEF_DATA_EXPLICIT_H__ diff --git a/compiler/souschef/include/souschef/Data/Gaussian.h b/compiler/souschef/include/souschef/Data/Gaussian.h index 8093b4c41..c9ac571f9 100644 --- a/compiler/souschef/include/souschef/Data/Gaussian.h +++ b/compiler/souschef/include/souschef/Data/Gaussian.h @@ -41,6 +41,22 @@ private: float _stddev; }; +class GaussianFloat16DataChef final : public DataChef +{ +public: + GaussianFloat16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev} + { + // DO NOTHING + } + +public: + std::vector<uint8_t> generate(int32_t count) const override; + +private: + float _mean; + float _stddev; +}; + class GaussianInt32DataChef final : public DataChef { public: @@ -109,6 +125,11 @@ struct GaussianUint8DataChefFactory : public DataChefFactory std::unique_ptr<DataChef> create(const Arguments &args) const; }; +struct GaussianFloat16DataChefFactory : public DataChefFactory +{ + std::unique_ptr<DataChef> create(const Arguments &args) const; +}; + } // namespace souschef #endif // __SOUSCHEF_DATA_GAUSSIAN_H__ diff --git a/compiler/souschef/src/Explicit.cpp b/compiler/souschef/src/Explicit.cpp index eb36cb7c3..3278ae3c3 100644 --- a/compiler/souschef/src/Explicit.cpp +++ b/compiler/souschef/src/Explicit.cpp @@ -19,6 +19,8 @@ #include <string> #include <vector> +#include <fp16.h> + namespace souschef { @@ -74,4 +76,23 @@ void ExplicitDataChef<std::string>::write_value(std::vector<uint8_t> &res, int32 } } +std::vector<uint8_t> ExplicitFloat16DataChef::generate(int32_t count) const +{ + std::vector<uint8_t> res; + + for (uint32_t n = 0; n < count; ++n) + { + float const fvalue = (n < _values.size()) ? _values.at(n) : 0.0; + uint16_t const value = fp16_ieee_from_fp32_value(fvalue); + auto const arr = reinterpret_cast<const uint8_t *>(&value); + + for (uint32_t b = 0; b < sizeof(uint16_t); ++b) + { + res.emplace_back(arr[b]); + } + } + + return res; +} + } // namespace souschef diff --git a/compiler/souschef/src/Gaussian.cpp b/compiler/souschef/src/Gaussian.cpp index 32cbcff4d..53a62cabf 100644 --- a/compiler/souschef/src/Gaussian.cpp +++ b/compiler/souschef/src/Gaussian.cpp @@ -23,6 +23,8 @@ #include <cassert> #include <stdexcept> +#include <fp16.h> + namespace souschef { @@ -36,7 +38,7 @@ static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float s std::vector<uint8_t> res; constexpr float max_cap = std::numeric_limits<T>::max(); - constexpr float min_cap = std::numeric_limits<T>::min(); + constexpr float min_cap = std::numeric_limits<T>::lowest(); for (uint32_t n = 0; n < count; ++n) { float raw_value = dist(rand); @@ -69,6 +71,34 @@ std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const return generate_gaussian<float>(count, _mean, _stddev); } +std::vector<uint8_t> GaussianFloat16DataChef::generate(int32_t count) const +{ + auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count(); + auto seed = static_cast<std::minstd_rand::result_type>(time_stamp); + + std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)}; + std::normal_distribution<float> dist{_mean, _stddev}; + + std::vector<uint8_t> res; + + constexpr float max_cap = 1e9; + constexpr float min_cap = -1e9; + for (uint32_t n = 0; n < count; ++n) + { + float raw_value = dist(rand); + const float capped_value = std::max(min_cap, std::min(max_cap, raw_value)); + const uint16_t value = fp16_ieee_from_fp32_value(capped_value); + auto const arr = reinterpret_cast<const uint8_t *>(&value); + + for (uint32_t b = 0; b < sizeof(uint16_t); ++b) + { + res.emplace_back(arr[b]); + } + } + + return res; +} + std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const { return generate_gaussian<int32_t>(count, _mean, _stddev); @@ -136,4 +166,17 @@ std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments & return std::unique_ptr<DataChef>{new GaussianUint8DataChef{mean, stddev}}; } +std::unique_ptr<DataChef> GaussianFloat16DataChefFactory::create(const Arguments &args) const +{ + if (args.count() != 2) + { + throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"}; + } + + auto const mean = to_number<float>(args.value(0)); + auto const stddev = to_number<float>(args.value(1)); + + return std::unique_ptr<DataChef>{new GaussianFloat16DataChef{mean, stddev}}; +} + } // namespace souschef diff --git a/compiler/tf2circle-conversion-test/CMakeLists.txt b/compiler/tf2circle-conversion-test/CMakeLists.txt index 27f2463f3..79a39873b 100644 --- a/compiler/tf2circle-conversion-test/CMakeLists.txt +++ b/compiler/tf2circle-conversion-test/CMakeLists.txt @@ -128,6 +128,10 @@ list(APPEND TEST_DEPS "${TEST_CONFIG}") # This "tf2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase add_custom_target(tf2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + # Run tests add_test( NAME tf2circle_conversion_test diff --git a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt index 48b098e24..83596fade 100644 --- a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt +++ b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt @@ -132,6 +132,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}") # Generate dependencies add_custom_target(tf2circle_dredd_pb_deps ALL DEPENDS ${DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_test( NAME tf2circle_dredd_pb_test COMMAND diff --git a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt index 789e58535..427e57502 100644 --- a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt +++ b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt @@ -175,6 +175,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}") # Generate dependencies add_custom_target(tf2circle_dredd_pbtxt_deps ALL DEPENDS ${DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_test( NAME tf2circle_dredd_pbtxt_test COMMAND diff --git a/compiler/tf2circle-model-test/CMakeLists.txt b/compiler/tf2circle-model-test/CMakeLists.txt index 2fb82236a..ad776a62b 100644 --- a/compiler/tf2circle-model-test/CMakeLists.txt +++ b/compiler/tf2circle-model-test/CMakeLists.txt @@ -100,6 +100,10 @@ list(APPEND DEPS "${TEST_RUNNER_SCRIPT}") ### Generate dependencies add_custom_target(tf2circle_model_test_deps ALL DEPENDS ${DEPS}) +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + # NOTE This target is not built by default add_test( NAME tf2circle_model_test diff --git a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt index b75c50772..ac9f14d70 100644 --- a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt +++ b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt index 87cf7836f..95a296ef8 100644 --- a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt +++ b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tflite-value-pb-test/CMakeLists.txt b/compiler/tf2tflite-value-pb-test/CMakeLists.txt index 41974f72c..a6c451e0b 100644 --- a/compiler/tf2tflite-value-pb-test/CMakeLists.txt +++ b/compiler/tf2tflite-value-pb-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt index 2e76e21d3..fde3e60b4 100644 --- a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt +++ b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt index 0b4739374..97aa07fd3 100644 --- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt +++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nncc_find_resource(TensorFlowTests) # diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py index 6b578ad53..2bcf55328 100755 --- a/compiler/tf2tfliteV2/tf2tfliteV2.py +++ b/compiler/tf2tfliteV2/tf2tfliteV2.py @@ -110,6 +110,12 @@ def _get_parser(): type=str, help="Names of the output arrays, comma-separated.") + # experimental options + parser.add_argument( + "--experimental_disable_batchmatmul_unfold", + action="store_true", + help="Experimental disable BatchMatMul unfold") + # Set default value parser.set_defaults(model_format="graph_def") return parser @@ -228,6 +234,9 @@ def _v2_convert(flags): keras_model = tf.keras.models.load_model(flags.input_path) converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) + if flags.experimental_disable_batchmatmul_unfold: + converter._experimental_disable_batchmatmul_unfold = True + converter.allow_custom_ops = True converter.experimental_new_converter = True diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt index 9e1cb720f..2c6e3a147 100644 --- a/compiler/tfl-inspect/CMakeLists.txt +++ b/compiler/tfl-inspect/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_tflite) +if(NOT TARGET mio_tflite280) return() -endif(NOT TARGET mio_tflite) +endif(NOT TARGET mio_tflite280) set(DRIVER "driver/Driver.cpp") diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp index 3e62e0ffb..8505ff4aa 100644 --- a/compiler/tfl-inspect/driver/Driver.cpp +++ b/compiler/tfl-inspect/driver/Driver.cpp @@ -35,7 +35,7 @@ int entry(int argc, char **argv) .nargs(0) .help("Dump Conv2D series weight operators in tflite file"); arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file"); - arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect"); + arser.add_argument("tflite").help("TFLite file to inspect"); try { diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt index 2fba335ea..5bead5bb4 100644 --- a/compiler/tfl-verify/CMakeLists.txt +++ b/compiler/tfl-verify/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_tflite) +if(NOT TARGET mio_tflite280) return() -endif(NOT TARGET mio_tflite) +endif(NOT TARGET mio_tflite280) file(GLOB_RECURSE SOURCES "src/*.cpp") diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp index 6d1897607..62345494b 100644 --- a/compiler/tfl-verify/src/Driver.cpp +++ b/compiler/tfl-verify/src/Driver.cpp @@ -25,7 +25,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify"); + arser.add_argument("tflite").help("TFLite file path to verify"); try { diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt index 948b1cecd..6205ac650 100644 --- a/compiler/tflchef/CMakeLists.txt +++ b/compiler/tflchef/CMakeLists.txt @@ -20,4 +20,9 @@ add_subdirectory(core) add_subdirectory(tflite) # Tools add_subdirectory(tools) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + add_subdirectory(tests) diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp index 200c71eca..f4dd4b332 100644 --- a/compiler/tflchef/core/src/Convert.cpp +++ b/compiler/tflchef/core/src/Convert.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,6 +63,8 @@ tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value) { case tflchef::FLOAT32: return tflite::TensorType_FLOAT32; + case tflchef::FLOAT16: + return tflite::TensorType_FLOAT16; case tflchef::INT32: return tflite::TensorType_INT32; case tflchef::UINT8: @@ -164,3 +167,222 @@ as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb, throw std::runtime_error("Unknown SparseIndexVector type"); } + +// namespace sparsity code referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +namespace sparsity +{ + +template <typename T> +FormatConverter<T>::FormatConverter(const std::vector<int> &shape, + const std::vector<int> &traversal_order, + const std::vector<TfLiteDimensionType> &format, + const std::vector<int> &block_size, + const std::vector<int> &block_map) + : dense_shape_(shape), traversal_order_(traversal_order), block_size_(block_size), + block_map_(block_map) +{ + dense_size_ = 1; + int block_dim = 0; + blocked_shape_.resize(shape.size()); + format_.resize(shape.size() + block_map.size()); + for (int i = 0; i < shape.size(); i++) + { + format_[i] = format[traversal_order[i]]; + dense_size_ *= shape[i]; + if (block_dim < block_map.size() && block_map[block_dim] == i) + { + blocked_shape_[i] = shape[i] / block_size[block_dim]; + block_dim++; + } + else + { + blocked_shape_[i] = shape[i]; + } + } + + // Only dense blocks are supported. + for (int i = 0; i < block_map.size(); i++) + { + format_[i + shape.size()] = kTfLiteDimDense; + } +} + +template <typename T> bool FormatConverter<T>::DenseToSparse(const T *src_data) +{ + int num_original_dims = dense_shape_.size(); + int num_block_dims = block_map_.size(); + int num_expanded_dims = num_original_dims + num_block_dims; + std::vector<int> expanded_shape(num_expanded_dims); + for (int i = 0; i < num_expanded_dims; i++) + { + if (i < num_original_dims) + { + expanded_shape[i] = blocked_shape_[i]; + } + else + { + expanded_shape[i] = block_size_[i - num_original_dims]; + } + } + + std::vector<int> shape_offset(num_original_dims); + shape_offset[shape_offset.size() - 1] = 1; + for (int i = num_original_dims - 1; i > 0; --i) + { + shape_offset[i - 1] = shape_offset[i] * dense_shape_[i]; + } + + std::vector<int> expanded_shape_offset(num_expanded_dims); + for (int i = 0; i < num_original_dims; ++i) + { + expanded_shape_offset[i] = shape_offset[i]; + } + for (int i = 0; i < num_block_dims; ++i) + { + int mapped_dim = block_map_[i]; + expanded_shape_offset[num_original_dims + i] = shape_offset[mapped_dim]; + expanded_shape_offset[mapped_dim] *= block_size_[i]; + } + + std::vector<int> dst_ordered_offset(num_expanded_dims); + for (int i = 0; i < num_expanded_dims; ++i) + { + dst_ordered_offset[i] = expanded_shape_offset[traversal_order_[i]]; + } + + std::vector<bool> dst_dim_has_nonzeroes(num_expanded_dims); + std::fill(dst_dim_has_nonzeroes.begin(), dst_dim_has_nonzeroes.end(), false); + std::vector<int> inner_compressed_dim(num_expanded_dims); + int most_recent_compressed_dim = -1; + std::vector<int> num_segments_of_next_compressed_dim(num_expanded_dims); + int segment_count = 1; + for (int i = num_expanded_dims - 1; i >= 0; --i) + { + inner_compressed_dim[i] = most_recent_compressed_dim; + if (format_[i] == kTfLiteDimSparseCSR) + { + most_recent_compressed_dim = i; + num_segments_of_next_compressed_dim[i] = segment_count; + segment_count = 1; + } + else + { + num_segments_of_next_compressed_dim[i] = -1; + segment_count *= expanded_shape[traversal_order_[i]]; + } + } + + dim_metadata_.resize(num_expanded_dims * 2); + std::vector<int> dst_sparse_dims; + dst_sparse_dims.reserve(num_expanded_dims); + for (int i = 0; i < num_expanded_dims; ++i) + { + dim_metadata_[i * 2].clear(); + dim_metadata_[i * 2 + 1].clear(); + if (format_[i] == kTfLiteDimDense) + { + // If dimension is dense, just store the shape. + dim_metadata_[i * 2].push_back(expanded_shape[traversal_order_[i]]); + } + else + { + dim_metadata_[i * 2].push_back(0); // Segment array always begins with 0. + dst_sparse_dims.push_back(i); // Add dimension to the sparse list. + } + } + + // This algorithm assumes that the block size is small enough for all the + // elements to fit in cache, so the strided accesses from different traversal + // order and the write-first-erase-later strategy shouldn't be too slow + int dst_dim_idx = num_expanded_dims; + std::vector<int> coordinate(num_expanded_dims, 0); + int dense_tensor_idx = 0; + while (dst_dim_idx >= 0) + { + if (dst_dim_idx == num_expanded_dims) + { + // We have a complete coordinate. Add the element to the value array if it + // is not zero, or if the last dimension is dense. + if (!IsZero(src_data[dense_tensor_idx])) + { + data_.push_back(src_data[dense_tensor_idx]); + // Mark all sparse dimensions that their current indices have nonzeroes. + for (auto dst_dim : dst_sparse_dims) + { + if (!dst_dim_has_nonzeroes[dst_dim]) + { + // Only add the index to the indices array if the current nonzero + // is the first nonzero of the block. + dim_metadata_[2 * dst_dim + 1].push_back(coordinate[dst_dim]); + dst_dim_has_nonzeroes[dst_dim] = true; + } + } + } + else if (format_[num_expanded_dims - 1] == kTfLiteDimDense) + { + data_.push_back(src_data[dense_tensor_idx]); + } + --dst_dim_idx; + } + else + { + int original_dim_idx = traversal_order_[dst_dim_idx]; + int dim_size = expanded_shape[original_dim_idx]; + if (dst_dim_has_nonzeroes[dst_dim_idx]) + { + // If the previous block has nonzeroes, reset the flag to false since + // we have just moved to a new block. + dst_dim_has_nonzeroes[dst_dim_idx] = false; + } + else if (format_[dst_dim_idx] == kTfLiteDimSparseCSR) + { + // This block is empty. Delete unnecessary values if compressed. + int next_compressed_dim = inner_compressed_dim[dst_dim_idx]; + int erase_offset = dim_metadata_[2 * dst_dim_idx + 1].size() * + num_segments_of_next_compressed_dim[dst_dim_idx]; + if (next_compressed_dim >= 0) + { + auto &segments = dim_metadata_[2 * inner_compressed_dim[dst_dim_idx]]; + segments.erase(segments.begin() + 1 + erase_offset, segments.end()); + } + else + { + data_.erase(data_.begin() + erase_offset, data_.end()); + } + } + if (++coordinate[dst_dim_idx] < dim_size) + { + // The current dst_dim_idx is valid (not out of bound). + dense_tensor_idx += dst_ordered_offset[dst_dim_idx]; + ++dst_dim_idx; + } + else + { + // dst_dim_idx has reached its dim size. Update segment array and go + // back to incrementing the previous dimension (dst_dim_idx - 1). + if (format_[dst_dim_idx] == kTfLiteDimSparseCSR) + { + dim_metadata_[2 * dst_dim_idx].push_back(dim_metadata_[2 * dst_dim_idx + 1].size()); + } + coordinate[dst_dim_idx] = -1; + dense_tensor_idx -= dst_ordered_offset[dst_dim_idx] * dim_size; + --dst_dim_idx; + } + } + } + + return true; +} + +template <typename T> bool FormatConverter<T>::IsZero(const T val) +{ + return (val == static_cast<T>(0)); +} + +template class FormatConverter<float>; +template class FormatConverter<uint16_t>; // float16 + +} // namespace sparsity diff --git a/compiler/tflchef/core/src/Convert.h b/compiler/tflchef/core/src/Convert.h index 45c93d229..6e910ea2c 100644 --- a/compiler/tflchef/core/src/Convert.h +++ b/compiler/tflchef/core/src/Convert.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2020 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,4 +35,52 @@ flatbuffers::Offset<void> as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb, const ::tflchef::TensorSparsity_IndexVec &value); +// codes under namespace sparsity referenced from +// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/ +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h +// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc + +namespace sparsity +{ + +// Storage format of each dimension in a sparse tensor. +typedef enum TfLiteDimensionType +{ + kTfLiteDimDense = 0, + kTfLiteDimSparseCSR, +} TfLiteDimensionType; + +template <typename T> class FormatConverter +{ +public: + FormatConverter(const std::vector<int32_t> &shape, const std::vector<int32_t> &traversal_order, + const std::vector<TfLiteDimensionType> &format, + const std::vector<int32_t> &block_size = {}, + const std::vector<int32_t> &block_map = {}); + + bool DenseToSparse(const T *src_data); + + const std::vector<T> &GetData() { return data_; } + const std::vector<std::vector<int32_t>> &GetDimMetadata() { return dim_metadata_; } + +private: + bool IsZero(const T val); + +private: + std::vector<int32_t> dense_shape_; + std::vector<int32_t> blocked_shape_; + size_t dense_size_; + std::vector<int32_t> traversal_order_; + std::vector<TfLiteDimensionType> format_; + std::vector<int32_t> block_size_; + std::vector<int32_t> block_map_; + std::vector<std::vector<int32_t>> dim_metadata_; + std::vector<T> data_; +}; + +extern template class FormatConverter<float>; +extern template class FormatConverter<uint16_t>; // float16 + +} // namespace sparsity + #endif // __CONVERT_H__ diff --git a/compiler/tflchef/core/src/DataChef.def b/compiler/tflchef/core/src/DataChef.def index c634c047e..28a5b7617 100644 --- a/compiler/tflchef/core/src/DataChef.def +++ b/compiler/tflchef/core/src/DataChef.def @@ -21,3 +21,7 @@ DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory) DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory) DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory) DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory) + +// FLOAT16 support for only gaussian, explicit for now +DATA_CHEF(FLOAT16, explicit, ExplicitFloat16DataChefFactory) +DATA_CHEF(FLOAT16, gaussian, GaussianFloat16DataChefFactory) diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp index 93b9334a6..a788adc02 100644 --- a/compiler/tflchef/core/src/ModelChef.cpp +++ b/compiler/tflchef/core/src/ModelChef.cpp @@ -92,6 +92,7 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type) static DataChefRegistry string; static DataChefRegistry boolean; static DataChefRegistry s16; + static DataChefRegistry fp16; switch (type) { @@ -101,6 +102,8 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type) return s64; case tflchef::FLOAT32: return fp32; + case tflchef::FLOAT16: + return fp16; case tflchef::UINT8: return u8; case tflchef::STRING: @@ -207,6 +210,41 @@ struct CookParams std::string noname; }; +std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> +make_dim_metadata_vec(flatbuffers::FlatBufferBuilder *flatbuffer_builder, int32_t dims_count, + const std::vector<int> &traversal_order_vec, + const std::vector<sparsity::TfLiteDimensionType> &format_vec, + const std::vector<std::vector<int32_t>> &dim_metadata_src) +{ + // Build sparsity parameter. + std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> dim_metadata_vec(dims_count); + for (int32_t i = 0; i < dims_count; i++) + { + const int32_t metadata_idx = 2 * i; + if (format_vec[traversal_order_vec[i]] == sparsity::kTfLiteDimSparseCSR) + { + auto array_segments = + tflite::CreateInt32Vector(*flatbuffer_builder, + flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx])) + .Union(); + auto array_indices = + tflite::CreateInt32Vector( + *flatbuffer_builder, flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx + 1])) + .Union(); + dim_metadata_vec[i] = + tflite::CreateDimensionMetadata(*flatbuffer_builder, tflite::DimensionType_SPARSE_CSR, 0, + tflite::SparseIndexVector_Int32Vector, array_segments, + tflite::SparseIndexVector_Int32Vector, array_indices); + } + else + { + dim_metadata_vec[i] = tflite::CreateDimensionMetadata( + *flatbuffer_builder, tflite::DimensionType_DENSE, dim_metadata_src[metadata_idx][0]); + } + } + return dim_metadata_vec; +} + template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, CookParams &cp) { LOGGER(l); @@ -271,6 +309,8 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, assert(operand.has_type()); + flatbuffers::Offset<tflite::SparsityParameters> sparsity_index; + flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape; std::vector<int32_t> dims; if (operand.has_shape()) @@ -298,16 +338,125 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, // Create Data int32_t count = (element_count(dims) > 0) ? element_count(dims) : filler.arg_size(); auto data_vec = chef->generate(count); - auto data = flatbuffer_builder->CreateVector(data_vec); - // Create Buffer - tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; - buffer_builder.add_data(data); - auto buffer = buffer_builder.Finish(); + if (operand.has_make_sparse() && operand.make_sparse()) + { + assert(not operand.has_sparsity()); + assert(operand.has_shape()); + + const int32_t dims_count = dims.size(); + std::vector<int> traversal_order_vec; + std::vector<sparsity::TfLiteDimensionType> format_vec; + for (int32_t o = 0; o < dims_count; ++o) + traversal_order_vec.push_back(o); + for (int32_t o = 0; o < dims_count - 1; ++o) + format_vec.push_back(sparsity::kTfLiteDimDense); + format_vec.push_back(sparsity::kTfLiteDimSparseCSR); + + if (operand.type() == tflchef::FLOAT32) + { + ::sparsity::FormatConverter<float> converter(dims, traversal_order_vec, format_vec); + converter.DenseToSparse(reinterpret_cast<const float *>(data_vec.data())); + const auto &sparse_data = converter.GetData(); + + std::vector<uint8_t> sparse_uint8; + for (int c = 0; c < sparse_data.size(); ++c) + { + const float value = sparse_data.at(c); + const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value); + for (uint32_t b = 0; b < sizeof(float); ++b) + { + sparse_uint8.emplace_back(arr[b]); + } + } + auto data = flatbuffer_builder->CreateVector(sparse_uint8); + + // Create Buffer + tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; + buffer_builder.add_data(data); + auto buffer = buffer_builder.Finish(); + + // Update Buffer Index & Vector + buffer_index = buffer_vec.size(); + buffer_vec.emplace_back(buffer); + + // save SparsityParameters + auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec); + + // Create block map + std::vector<int> block_map_vec{}; + auto block_map = flatbuffer_builder->CreateVector(block_map_vec); + + // Create dimension metadata + const auto &dim_metadata_src = converter.GetDimMetadata(); + auto dim_metadata_vec = + make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec, + format_vec, dim_metadata_src); + auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec); + sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order, + block_map, dim_metadata); + } + else if (operand.type() == tflchef::FLOAT16) + { + ::sparsity::FormatConverter<uint16_t> converter(dims, traversal_order_vec, format_vec); + converter.DenseToSparse(reinterpret_cast<const uint16_t *>(data_vec.data())); + const auto &sparse_data = converter.GetData(); + + std::vector<uint8_t> sparse_uint8; + for (int c = 0; c < sparse_data.size(); ++c) + { + const uint16_t value = sparse_data.at(c); + const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value); + for (uint32_t b = 0; b < sizeof(uint16_t); ++b) + { + sparse_uint8.emplace_back(arr[b]); + } + } + auto data = flatbuffer_builder->CreateVector(sparse_uint8); + + // Create Buffer + tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; + buffer_builder.add_data(data); + auto buffer = buffer_builder.Finish(); + + // Update Buffer Index & Vector + buffer_index = buffer_vec.size(); + buffer_vec.emplace_back(buffer); + + // save SparsityParameters + auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec); + + // Create block map + std::vector<int> block_map_vec{}; + auto block_map = flatbuffer_builder->CreateVector(block_map_vec); + + // Create dimension metadata + const auto &dim_metadata_src = converter.GetDimMetadata(); + auto dim_metadata_vec = + make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec, + format_vec, dim_metadata_src); + auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec); + sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order, + block_map, dim_metadata); + } + else + { + throw std::runtime_error{"NYI: unsupported operand type"}; + } + } + else + { + auto data = flatbuffer_builder->CreateVector(data_vec); + + // Create Buffer + tflite::BufferBuilder buffer_builder{*flatbuffer_builder}; + buffer_builder.add_data(data); + auto buffer = buffer_builder.Finish(); - // Update Buffer Index & Vector - buffer_index = buffer_vec.size(); - buffer_vec.emplace_back(buffer); + // Update Buffer Index & Vector + buffer_index = buffer_vec.size(); + buffer_vec.emplace_back(buffer); + } } else { @@ -384,8 +533,6 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, quant_index = quant_builder.Finish(); } - flatbuffers::Offset<tflite::SparsityParameters> sparsity_index; - if (operand.has_sparsity()) { const auto &sparsity = operand.sparsity(); diff --git a/compiler/tflchef/core/src/Op/Densify.cpp b/compiler/tflchef/core/src/Op/Densify.cpp new file mode 100644 index 000000000..63c4e207a --- /dev/null +++ b/compiler/tflchef/core/src/Op/Densify.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Densify.h" + +flatbuffers::Offset<void> DensifyChef::value(flatbuffers::FlatBufferBuilder &fbb) const +{ + tflite::DensifyOptionsBuilder options_builder{fbb}; + + return options_builder.Finish().Union(); +} + +std::unique_ptr<OpChef> DensifyChefFactory::create(const tflchef::Operation *operation) const +{ + return std::unique_ptr<OpChef>{new DensifyChef{operation}}; +} diff --git a/compiler/tflchef/core/src/Op/Densify.h b/compiler/tflchef/core/src/Op/Densify.h new file mode 100644 index 000000000..f6af693d9 --- /dev/null +++ b/compiler/tflchef/core/src/Op/Densify.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OP_DENSIFY_H__ +#define __OP_DENSIFY_H__ + +#include "OpChef.h" + +class DensifyChef final : public OpChef +{ +public: + explicit DensifyChef(const tflchef::Operation *operation) : _operation{operation} + { + // DO NOTHING + } + +public: + tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DENSIFY; } + + tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_DensifyOptions; } + + flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override; + +private: + const tflchef::Operation *_operation; +}; + +struct DensifyChefFactory final : public OpChefFactory +{ + std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override; +}; + +#endif // __OP_DENSIFY_H__ diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def index beebd359f..c19d00dfb 100644 --- a/compiler/tflchef/core/src/OpChef.def +++ b/compiler/tflchef/core/src/OpChef.def @@ -18,6 +18,7 @@ OP_CHEF(Ceil, CeilChefFactory) OP_CHEF(Concatenation, ConcatenationChefFactory) OP_CHEF(Conv2D, Conv2DChefFactory) OP_CHEF(Cos, CosChefFactory) +OP_CHEF(Densify, DensifyChefFactory) OP_CHEF(DepthToSpace, DepthToSpaceChefFactory) OP_CHEF(DepthwiseConv2D, DepthwiseConv2DChefFactory) OP_CHEF(Dequantize, DequantizeChefFactory) diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h index 159019abf..3cd3be558 100644 --- a/compiler/tflchef/core/src/OpChefs.h +++ b/compiler/tflchef/core/src/OpChefs.h @@ -31,6 +31,7 @@ #include "Op/Concatenation.h" #include "Op/Conv2D.h" #include "Op/Cos.h" +#include "Op/Densify.h" #include "Op/DepthToSpace.h" #include "Op/DepthwiseConv2D.h" #include "Op/Dequantize.h" diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto index 1abefafe1..da4b6920d 100644 --- a/compiler/tflchef/proto/tflchef.proto +++ b/compiler/tflchef/proto/tflchef.proto @@ -15,6 +15,7 @@ package tflchef; // This enum value corresponds to TensorType in TensorFlow Lite schema enum TensorType { FLOAT32 = 0; + FLOAT16 = 1; INT32 = 2; UINT8 = 3; INT64 = 4; @@ -88,6 +89,12 @@ message Operand { optional TensorSparsity sparsity = 6; optional bool is_variable = 7 [default = false]; optional ShapeSignature shape_signature = 8; + // 'make_sparse' is to tell tflchef to make a sparse tensor + // as filling 'TensorSparsity' by hand can be difficult + // for now, last dimension will be SPARSE_CSR + // ex) shape [2, 3, 4] will have + // TraversalOrder [0, 1, 2] with [DENSE, DENSE, SPARSE_CSR] + optional bool make_sparse = 9 [default = false]; } // This enum value corresponds to Padding in TensorFlow Lite schema @@ -534,6 +541,10 @@ message FakeQuantOptions { optional bool narrow_range = 4 [default = false]; } +message DensifyOptions { + // NONE +} + message Operation { optional string type = 1; repeated string input = 2; @@ -650,6 +661,7 @@ message Operation { optional AddNOptions add_n_options = 207; optional MatMulOptions matmul_options = 208; optional MaxPoolWithArgmaxOptions max_pool_with_argmax_options = 209; + optional DensifyOptions densify_options = 210; // NOTE if there are more than two options with same type of Options // use the number not listed in the above reserve list } diff --git a/compiler/tflchef/tests/make_sparse/test.recipe b/compiler/tflchef/tests/make_sparse/test.recipe new file mode 100644 index 000000000..15cc93a5d --- /dev/null +++ b/compiler/tflchef/tests/make_sparse/test.recipe @@ -0,0 +1,44 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse" + type: FLOAT32 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "2" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "3" + } + make_sparse: true +} +operand { + name: "dense" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse" + output: "dense" +} +operation { + type: "Add" + input: "in" + input: "dense" + output: "out" + add_options { + activation: NONE + } +} +input: "in" +output: "out" diff --git a/compiler/tflchef/tests/make_sparse_f16/test.recipe b/compiler/tflchef/tests/make_sparse_f16/test.recipe new file mode 100644 index 000000000..5977a1d32 --- /dev/null +++ b/compiler/tflchef/tests/make_sparse_f16/test.recipe @@ -0,0 +1,54 @@ +operand { + name: "in" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "sparse16" + type: FLOAT16 + shape { dim: 4 dim: 4 } + filler { + tag: "explicit" + arg: "2" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "0" + arg: "0" arg: "0" arg: "0" arg: "3" + } + make_sparse: true +} +operand { + name: "dense16" + type: FLOAT16 + shape { dim: 4 dim: 4 } +} +operand { + name: "dense32" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operand { + name: "out" + type: FLOAT32 + shape { dim: 4 dim: 4 } +} +operation { + type: "Densify" + input: "sparse16" + output: "dense16" +} +operation { + type: "Dequantize" + input: "dense16" + output: "dense32" +} +operation { + type: "Add" + input: "in" + input: "dense32" + output: "out" + add_options { + activation: NONE + } +} +input: "in" +output: "out" diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt index 3c3352b0a..d9a20a2e1 100644 --- a/compiler/tflchef/tflite/CMakeLists.txt +++ b/compiler/tflchef/tflite/CMakeLists.txt @@ -3,6 +3,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_library(tflchef_tflite STATIC ${SOURCES}) target_include_directories(tflchef_tflite PUBLIC include) target_include_directories(tflchef_tflite PRIVATE src) +target_include_directories(tflchef_tflite PRIVATE src/Op/include) target_link_libraries(tflchef_tflite tflchef_proto) target_link_libraries(tflchef_tflite mio_tflite280) target_link_libraries(tflchef_tflite mio_tflite280_helper) diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp index f47e51d3d..242987661 100644 --- a/compiler/tflchef/tflite/src/Convert.cpp +++ b/compiler/tflchef/tflite/src/Convert.cpp @@ -35,8 +35,9 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type) return tflchef::BOOL; case tflite::TensorType_INT16: return tflchef::INT16; + case tflite::TensorType_FLOAT16: + return tflchef::FLOAT16; // TODO handle other types - // TensorType_FLOAT16 // TensorType_STRING // TensorType_COMPLEX64 default: diff --git a/compiler/tflchef/tflite/src/FillerHelper.cpp b/compiler/tflchef/tflite/src/FillerHelper.cpp index cf96d2e8c..1ac99ad40 100644 --- a/compiler/tflchef/tflite/src/FillerHelper.cpp +++ b/compiler/tflchef/tflite/src/FillerHelper.cpp @@ -48,3 +48,18 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import) } } // namespace tflchef + +// helpers of common codes for filling inputs +namespace tflchef +{ + +void fill_two_inputs(const tflite::Operator *op, TFliteImport *import) +{ + const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); + assert(inputs.size() == 2); + + fill_tensor_to_import(inputs[0], import); + fill_tensor_to_import(inputs[1], import); +} + +} // namespace tflchef diff --git a/compiler/tflchef/tflite/src/FillerHelper.h b/compiler/tflchef/tflite/src/FillerHelper.h index 053a5c18a..e96ae73d0 100644 --- a/compiler/tflchef/tflite/src/FillerHelper.h +++ b/compiler/tflchef/tflite/src/FillerHelper.h @@ -28,4 +28,12 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import); } // namespace tflchef +// helpers of common codes for filling inputs +namespace tflchef +{ + +void fill_two_inputs(const tflite::Operator *op, TFliteImport *import); + +} // namespace tflchef + #endif // __FILLER_HELPER_H__ diff --git a/compiler/tflchef/tflite/src/Op/Add.cpp b/compiler/tflchef/tflite/src/Op/Add.cpp index 3e880a63b..23d360616 100644 --- a/compiler/tflchef/tflite/src/Op/Add.cpp +++ b/compiler/tflchef/tflite/src/Op/Add.cpp @@ -27,11 +27,7 @@ void TFliteOpAdd::filler(const tflite::Operator *op, TFliteImport *import, { // Add may have constant input - const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpAdd::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Maximum.cpp b/compiler/tflchef/tflite/src/Op/Maximum.cpp index d52caf0c2..65e4c2c99 100644 --- a/compiler/tflchef/tflite/src/Op/Maximum.cpp +++ b/compiler/tflchef/tflite/src/Op/Maximum.cpp @@ -25,11 +25,7 @@ namespace tflchef void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { - const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Minimum.cpp b/compiler/tflchef/tflite/src/Op/Minimum.cpp index 6440f1deb..b4d255ce3 100644 --- a/compiler/tflchef/tflite/src/Op/Minimum.cpp +++ b/compiler/tflchef/tflite/src/Op/Minimum.cpp @@ -25,11 +25,7 @@ namespace tflchef void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { - const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Mul.cpp b/compiler/tflchef/tflite/src/Op/Mul.cpp index 9faa4acaf..1145ff7e6 100644 --- a/compiler/tflchef/tflite/src/Op/Mul.cpp +++ b/compiler/tflchef/tflite/src/Op/Mul.cpp @@ -27,11 +27,7 @@ void TFliteOpMul::filler(const tflite::Operator *op, TFliteImport *import, { // Mul may have constant input - const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpMul::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp index ad9921970..4f096ced4 100644 --- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp +++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp @@ -38,7 +38,7 @@ void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImpor for (int32_t index = 2; index < 5; ++index) { - fill_tensor_to_import(index, import); + fill_tensor_to_import(inputs[index], import); } } diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp index db7f4c932..332cba0ff 100644 --- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp +++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp @@ -41,7 +41,7 @@ void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImpor for (int32_t index = 2; index < 6; ++index) { - fill_tensor_to_import(index, import); + fill_tensor_to_import(inputs[index], import); } } diff --git a/compiler/tflchef/tflite/src/Op/PadV2.cpp b/compiler/tflchef/tflite/src/Op/PadV2.cpp index 0b1c9f3b2..a6b657f59 100644 --- a/compiler/tflchef/tflite/src/Op/PadV2.cpp +++ b/compiler/tflchef/tflite/src/Op/PadV2.cpp @@ -16,6 +16,7 @@ #include "PadV2.h" +#include "Convert.h" #include "FillerHelper.h" namespace tflchef @@ -24,9 +25,11 @@ namespace tflchef void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { + const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); + // Filler for paddings and constant_values - fill_tensor_to_import(1, import); - fill_tensor_to_import(2, import); + fill_tensor_to_import(inputs[1], import); + fill_tensor_to_import(inputs[2], import); } tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp index 548a09a67..ec09a69a4 100644 --- a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp +++ b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp @@ -25,9 +25,11 @@ namespace tflchef void TFliteOpScatterNd::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { + const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); + // Filler for indices and shape - fill_tensor_to_import(0, import); - fill_tensor_to_import(2, import); + fill_tensor_to_import(inputs[0], import); + fill_tensor_to_import(inputs[2], import); } tflchef::Operation *TFliteOpScatterNd::build(const tflite::Operator *, TFliteImport *, diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp index a975ca4b3..bc45a94e0 100644 --- a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp +++ b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp @@ -16,6 +16,7 @@ #include "SegmentSum.h" +#include "Convert.h" #include "FillerHelper.h" namespace tflchef @@ -24,8 +25,10 @@ namespace tflchef void TFliteOpSegmentSum::filler(const tflite::Operator *op, TFliteImport *import, tflchef::ModelRecipe *model_recipe) const { - // Filler for indices and shape - fill_tensor_to_import(1, import); + const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); + + // Filler for segment_ids + fill_tensor_to_import(inputs[1], import); } tflchef::Operation *TFliteOpSegmentSum::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Sub.cpp b/compiler/tflchef/tflite/src/Op/Sub.cpp index 0a08bbfdf..584be0ab9 100644 --- a/compiler/tflchef/tflite/src/Op/Sub.cpp +++ b/compiler/tflchef/tflite/src/Op/Sub.cpp @@ -27,11 +27,7 @@ void TFliteOpSub::filler(const tflite::Operator *op, TFliteImport *import, { // Sub may have constant input - const std::vector<int32_t> &inputs = as_index_vector(op->inputs()); - assert(inputs.size() == 2); - - fill_tensor_to_import(inputs[0], import); - fill_tensor_to_import(inputs[1], import); + fill_two_inputs(op, import); } tflchef::Operation *TFliteOpSub::build(const tflite::Operator *op, TFliteImport *import, diff --git a/compiler/tflchef/tflite/src/Op/Abs.h b/compiler/tflchef/tflite/src/Op/include/Abs.h index d99b0d593..d99b0d593 100644 --- a/compiler/tflchef/tflite/src/Op/Abs.h +++ b/compiler/tflchef/tflite/src/Op/include/Abs.h diff --git a/compiler/tflchef/tflite/src/Op/Add.h b/compiler/tflchef/tflite/src/Op/include/Add.h index 49d945f8b..49d945f8b 100644 --- a/compiler/tflchef/tflite/src/Op/Add.h +++ b/compiler/tflchef/tflite/src/Op/include/Add.h diff --git a/compiler/tflchef/tflite/src/Op/AddN.h b/compiler/tflchef/tflite/src/Op/include/AddN.h index 4387aa06a..4387aa06a 100644 --- a/compiler/tflchef/tflite/src/Op/AddN.h +++ b/compiler/tflchef/tflite/src/Op/include/AddN.h diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.h b/compiler/tflchef/tflite/src/Op/include/ArgMax.h index 30068ecf2..30068ecf2 100644 --- a/compiler/tflchef/tflite/src/Op/ArgMax.h +++ b/compiler/tflchef/tflite/src/Op/include/ArgMax.h diff --git a/compiler/tflchef/tflite/src/Op/ArgMin.h b/compiler/tflchef/tflite/src/Op/include/ArgMin.h index 83c643c1a..83c643c1a 100644 --- a/compiler/tflchef/tflite/src/Op/ArgMin.h +++ b/compiler/tflchef/tflite/src/Op/include/ArgMin.h diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.h b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h index f9e9fb254..f9e9fb254 100644 --- a/compiler/tflchef/tflite/src/Op/AveragePool2D.h +++ b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h diff --git a/compiler/tflchef/tflite/src/Op/BatchMatMul.h b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h index 6eb4c6e68..6eb4c6e68 100644 --- a/compiler/tflchef/tflite/src/Op/BatchMatMul.h +++ b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h index ae2114c97..ae2114c97 100644 --- a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h +++ b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h index 333f542ac..333f542ac 100644 --- a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h +++ b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h diff --git a/compiler/tflchef/tflite/src/Op/Cast.h b/compiler/tflchef/tflite/src/Op/include/Cast.h index 29c126c93..29c126c93 100644 --- a/compiler/tflchef/tflite/src/Op/Cast.h +++ b/compiler/tflchef/tflite/src/Op/include/Cast.h diff --git a/compiler/tflchef/tflite/src/Op/Ceil.h b/compiler/tflchef/tflite/src/Op/include/Ceil.h index 44df20778..44df20778 100644 --- a/compiler/tflchef/tflite/src/Op/Ceil.h +++ b/compiler/tflchef/tflite/src/Op/include/Ceil.h diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.h b/compiler/tflchef/tflite/src/Op/include/Concatenation.h index 4a7ea5791..4a7ea5791 100644 --- a/compiler/tflchef/tflite/src/Op/Concatenation.h +++ b/compiler/tflchef/tflite/src/Op/include/Concatenation.h diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.h b/compiler/tflchef/tflite/src/Op/include/Conv2D.h index 0216e9ce9..0216e9ce9 100644 --- a/compiler/tflchef/tflite/src/Op/Conv2D.h +++ b/compiler/tflchef/tflite/src/Op/include/Conv2D.h diff --git a/compiler/tflchef/tflite/src/Op/Cos.h b/compiler/tflchef/tflite/src/Op/include/Cos.h index 8f3dbe3a6..8f3dbe3a6 100644 --- a/compiler/tflchef/tflite/src/Op/Cos.h +++ b/compiler/tflchef/tflite/src/Op/include/Cos.h diff --git a/compiler/tflchef/tflite/src/Op/DepthToSpace.h b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h index b5852ac89..b5852ac89 100644 --- a/compiler/tflchef/tflite/src/Op/DepthToSpace.h +++ b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h index c172536b4..c172536b4 100644 --- a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h +++ b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h diff --git a/compiler/tflchef/tflite/src/Op/Dequantize.h b/compiler/tflchef/tflite/src/Op/include/Dequantize.h index df1c7bbdb..df1c7bbdb 100644 --- a/compiler/tflchef/tflite/src/Op/Dequantize.h +++ b/compiler/tflchef/tflite/src/Op/include/Dequantize.h diff --git a/compiler/tflchef/tflite/src/Op/Div.h b/compiler/tflchef/tflite/src/Op/include/Div.h index 254a4cd99..254a4cd99 100644 --- a/compiler/tflchef/tflite/src/Op/Div.h +++ b/compiler/tflchef/tflite/src/Op/include/Div.h diff --git a/compiler/tflchef/tflite/src/Op/ELU.h b/compiler/tflchef/tflite/src/Op/include/ELU.h index 490c9fde4..490c9fde4 100644 --- a/compiler/tflchef/tflite/src/Op/ELU.h +++ b/compiler/tflchef/tflite/src/Op/include/ELU.h diff --git a/compiler/tflchef/tflite/src/Op/Equal.h b/compiler/tflchef/tflite/src/Op/include/Equal.h index fd4b40001..fd4b40001 100644 --- a/compiler/tflchef/tflite/src/Op/Equal.h +++ b/compiler/tflchef/tflite/src/Op/include/Equal.h diff --git a/compiler/tflchef/tflite/src/Op/Exp.h b/compiler/tflchef/tflite/src/Op/include/Exp.h index 5ff3ddc8b..5ff3ddc8b 100644 --- a/compiler/tflchef/tflite/src/Op/Exp.h +++ b/compiler/tflchef/tflite/src/Op/include/Exp.h diff --git a/compiler/tflchef/tflite/src/Op/ExpandDims.h b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h index e2f3e4e50..e2f3e4e50 100644 --- a/compiler/tflchef/tflite/src/Op/ExpandDims.h +++ b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.h b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h index f36e615df..f36e615df 100644 --- a/compiler/tflchef/tflite/src/Op/FakeQuant.h +++ b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h diff --git a/compiler/tflchef/tflite/src/Op/Fill.h b/compiler/tflchef/tflite/src/Op/include/Fill.h index 4f46f628a..4f46f628a 100644 --- a/compiler/tflchef/tflite/src/Op/Fill.h +++ b/compiler/tflchef/tflite/src/Op/include/Fill.h diff --git a/compiler/tflchef/tflite/src/Op/Floor.h b/compiler/tflchef/tflite/src/Op/include/Floor.h index f0f8ef38a..f0f8ef38a 100644 --- a/compiler/tflchef/tflite/src/Op/Floor.h +++ b/compiler/tflchef/tflite/src/Op/include/Floor.h diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.h b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h index 5d049a668..5d049a668 100644 --- a/compiler/tflchef/tflite/src/Op/FloorDiv.h +++ b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h diff --git a/compiler/tflchef/tflite/src/Op/FloorMod.h b/compiler/tflchef/tflite/src/Op/include/FloorMod.h index f36dfe813..f36dfe813 100644 --- a/compiler/tflchef/tflite/src/Op/FloorMod.h +++ b/compiler/tflchef/tflite/src/Op/include/FloorMod.h diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.h b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h index 8fbe1f3ed..8fbe1f3ed 100644 --- a/compiler/tflchef/tflite/src/Op/FullyConnected.h +++ b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h diff --git a/compiler/tflchef/tflite/src/Op/Gather.h b/compiler/tflchef/tflite/src/Op/include/Gather.h index e01276b76..e01276b76 100644 --- a/compiler/tflchef/tflite/src/Op/Gather.h +++ b/compiler/tflchef/tflite/src/Op/include/Gather.h diff --git a/compiler/tflchef/tflite/src/Op/GatherNd.h b/compiler/tflchef/tflite/src/Op/include/GatherNd.h index 112f23d33..112f23d33 100644 --- a/compiler/tflchef/tflite/src/Op/GatherNd.h +++ b/compiler/tflchef/tflite/src/Op/include/GatherNd.h diff --git a/compiler/tflchef/tflite/src/Op/Greater.h b/compiler/tflchef/tflite/src/Op/include/Greater.h index 3ab2d1a4e..3ab2d1a4e 100644 --- a/compiler/tflchef/tflite/src/Op/Greater.h +++ b/compiler/tflchef/tflite/src/Op/include/Greater.h diff --git a/compiler/tflchef/tflite/src/Op/GreaterEqual.h b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h index 96b0af78a..96b0af78a 100644 --- a/compiler/tflchef/tflite/src/Op/GreaterEqual.h +++ b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h diff --git a/compiler/tflchef/tflite/src/Op/L2Normalize.h b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h index a73eae6c8..a73eae6c8 100644 --- a/compiler/tflchef/tflite/src/Op/L2Normalize.h +++ b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h diff --git a/compiler/tflchef/tflite/src/Op/L2Pool2D.h b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h index 046353440..046353440 100644 --- a/compiler/tflchef/tflite/src/Op/L2Pool2D.h +++ b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h diff --git a/compiler/tflchef/tflite/src/Op/LeakyRelu.h b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h index 28e63e0ca..28e63e0ca 100644 --- a/compiler/tflchef/tflite/src/Op/LeakyRelu.h +++ b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h diff --git a/compiler/tflchef/tflite/src/Op/Less.h b/compiler/tflchef/tflite/src/Op/include/Less.h index 1316cb613..1316cb613 100644 --- a/compiler/tflchef/tflite/src/Op/Less.h +++ b/compiler/tflchef/tflite/src/Op/include/Less.h diff --git a/compiler/tflchef/tflite/src/Op/LessEqual.h b/compiler/tflchef/tflite/src/Op/include/LessEqual.h index 81c710fbc..81c710fbc 100644 --- a/compiler/tflchef/tflite/src/Op/LessEqual.h +++ b/compiler/tflchef/tflite/src/Op/include/LessEqual.h diff --git a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h index c0eb3f2b1..c0eb3f2b1 100644 --- a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h +++ b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h diff --git a/compiler/tflchef/tflite/src/Op/Log.h b/compiler/tflchef/tflite/src/Op/include/Log.h index 9d17e2f81..9d17e2f81 100644 --- a/compiler/tflchef/tflite/src/Op/Log.h +++ b/compiler/tflchef/tflite/src/Op/include/Log.h diff --git a/compiler/tflchef/tflite/src/Op/LogSoftmax.h b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h index efd81f3e9..efd81f3e9 100644 --- a/compiler/tflchef/tflite/src/Op/LogSoftmax.h +++ b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h diff --git a/compiler/tflchef/tflite/src/Op/LogicalAnd.h b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h index 1f7a964b9..1f7a964b9 100644 --- a/compiler/tflchef/tflite/src/Op/LogicalAnd.h +++ b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.h b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h index b75d33554..b75d33554 100644 --- a/compiler/tflchef/tflite/src/Op/LogicalNot.h +++ b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.h b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h index 5331a0d65..5331a0d65 100644 --- a/compiler/tflchef/tflite/src/Op/LogicalOr.h +++ b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h diff --git a/compiler/tflchef/tflite/src/Op/Logistic.h b/compiler/tflchef/tflite/src/Op/include/Logistic.h index a75bf490e..a75bf490e 100644 --- a/compiler/tflchef/tflite/src/Op/Logistic.h +++ b/compiler/tflchef/tflite/src/Op/include/Logistic.h diff --git a/compiler/tflchef/tflite/src/Op/MatrixDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h index 4074f2c36..4074f2c36 100644 --- a/compiler/tflchef/tflite/src/Op/MatrixDiag.h +++ b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h diff --git a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h index 0e7ec7f32..0e7ec7f32 100644 --- a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h +++ b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.h b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h index 36533f80c..36533f80c 100644 --- a/compiler/tflchef/tflite/src/Op/MaxPool2D.h +++ b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h diff --git a/compiler/tflchef/tflite/src/Op/Maximum.h b/compiler/tflchef/tflite/src/Op/include/Maximum.h index acafec343..acafec343 100644 --- a/compiler/tflchef/tflite/src/Op/Maximum.h +++ b/compiler/tflchef/tflite/src/Op/include/Maximum.h diff --git a/compiler/tflchef/tflite/src/Op/Mean.h b/compiler/tflchef/tflite/src/Op/include/Mean.h index 532c40c66..532c40c66 100644 --- a/compiler/tflchef/tflite/src/Op/Mean.h +++ b/compiler/tflchef/tflite/src/Op/include/Mean.h diff --git a/compiler/tflchef/tflite/src/Op/Minimum.h b/compiler/tflchef/tflite/src/Op/include/Minimum.h index 5db5b7940..5db5b7940 100644 --- a/compiler/tflchef/tflite/src/Op/Minimum.h +++ b/compiler/tflchef/tflite/src/Op/include/Minimum.h diff --git a/compiler/tflchef/tflite/src/Op/MirrorPad.h b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h index c9acdd498..c9acdd498 100644 --- a/compiler/tflchef/tflite/src/Op/MirrorPad.h +++ b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h diff --git a/compiler/tflchef/tflite/src/Op/Mul.h b/compiler/tflchef/tflite/src/Op/include/Mul.h index fd009d2fd..fd009d2fd 100644 --- a/compiler/tflchef/tflite/src/Op/Mul.h +++ b/compiler/tflchef/tflite/src/Op/include/Mul.h diff --git a/compiler/tflchef/tflite/src/Op/Neg.h b/compiler/tflchef/tflite/src/Op/include/Neg.h index c77ab7e84..c77ab7e84 100644 --- a/compiler/tflchef/tflite/src/Op/Neg.h +++ b/compiler/tflchef/tflite/src/Op/include/Neg.h diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h index 114a2ad2f..114a2ad2f 100644 --- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h +++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h index c948043f4..c948043f4 100644 --- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h +++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h diff --git a/compiler/tflchef/tflite/src/Op/NotEqual.h b/compiler/tflchef/tflite/src/Op/include/NotEqual.h index b1febdcc5..b1febdcc5 100644 --- a/compiler/tflchef/tflite/src/Op/NotEqual.h +++ b/compiler/tflchef/tflite/src/Op/include/NotEqual.h diff --git a/compiler/tflchef/tflite/src/Op/OneHot.h b/compiler/tflchef/tflite/src/Op/include/OneHot.h index 50bbed095..50bbed095 100644 --- a/compiler/tflchef/tflite/src/Op/OneHot.h +++ b/compiler/tflchef/tflite/src/Op/include/OneHot.h diff --git a/compiler/tflchef/tflite/src/Op/PRelu.h b/compiler/tflchef/tflite/src/Op/include/PRelu.h index b35c6e7ce..b35c6e7ce 100644 --- a/compiler/tflchef/tflite/src/Op/PRelu.h +++ b/compiler/tflchef/tflite/src/Op/include/PRelu.h diff --git a/compiler/tflchef/tflite/src/Op/Pack.h b/compiler/tflchef/tflite/src/Op/include/Pack.h index 7779f64ed..7779f64ed 100644 --- a/compiler/tflchef/tflite/src/Op/Pack.h +++ b/compiler/tflchef/tflite/src/Op/include/Pack.h diff --git a/compiler/tflchef/tflite/src/Op/Pad.h b/compiler/tflchef/tflite/src/Op/include/Pad.h index 99998d418..99998d418 100644 --- a/compiler/tflchef/tflite/src/Op/Pad.h +++ b/compiler/tflchef/tflite/src/Op/include/Pad.h diff --git a/compiler/tflchef/tflite/src/Op/PadV2.h b/compiler/tflchef/tflite/src/Op/include/PadV2.h index 3aa474b92..3aa474b92 100644 --- a/compiler/tflchef/tflite/src/Op/PadV2.h +++ b/compiler/tflchef/tflite/src/Op/include/PadV2.h diff --git a/compiler/tflchef/tflite/src/Op/Pow.h b/compiler/tflchef/tflite/src/Op/include/Pow.h index 20e847377..20e847377 100644 --- a/compiler/tflchef/tflite/src/Op/Pow.h +++ b/compiler/tflchef/tflite/src/Op/include/Pow.h diff --git a/compiler/tflchef/tflite/src/Op/Quantize.h b/compiler/tflchef/tflite/src/Op/include/Quantize.h index 256ed5a5c..256ed5a5c 100644 --- a/compiler/tflchef/tflite/src/Op/Quantize.h +++ b/compiler/tflchef/tflite/src/Op/include/Quantize.h diff --git a/compiler/tflchef/tflite/src/Op/Range.h b/compiler/tflchef/tflite/src/Op/include/Range.h index ad10dc58b..ad10dc58b 100644 --- a/compiler/tflchef/tflite/src/Op/Range.h +++ b/compiler/tflchef/tflite/src/Op/include/Range.h diff --git a/compiler/tflchef/tflite/src/Op/Rank.h b/compiler/tflchef/tflite/src/Op/include/Rank.h index 003d9d310..003d9d310 100644 --- a/compiler/tflchef/tflite/src/Op/Rank.h +++ b/compiler/tflchef/tflite/src/Op/include/Rank.h diff --git a/compiler/tflchef/tflite/src/Op/ReLU.h b/compiler/tflchef/tflite/src/Op/include/ReLU.h index be1090270..be1090270 100644 --- a/compiler/tflchef/tflite/src/Op/ReLU.h +++ b/compiler/tflchef/tflite/src/Op/include/ReLU.h diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.h b/compiler/tflchef/tflite/src/Op/include/ReLU6.h index 64ddb6a2e..64ddb6a2e 100644 --- a/compiler/tflchef/tflite/src/Op/ReLU6.h +++ b/compiler/tflchef/tflite/src/Op/include/ReLU6.h diff --git a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h index 0767006af..0767006af 100644 --- a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h +++ b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceAny.h b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h index dd5e361d5..dd5e361d5 100644 --- a/compiler/tflchef/tflite/src/Op/ReduceAny.h +++ b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceMax.h b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h index 8e65cf47c..8e65cf47c 100644 --- a/compiler/tflchef/tflite/src/Op/ReduceMax.h +++ b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceMin.h b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h index 88cba6fe7..88cba6fe7 100644 --- a/compiler/tflchef/tflite/src/Op/ReduceMin.h +++ b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h diff --git a/compiler/tflchef/tflite/src/Op/ReduceProd.h b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h index e7766840a..e7766840a 100644 --- a/compiler/tflchef/tflite/src/Op/ReduceProd.h +++ b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h diff --git a/compiler/tflchef/tflite/src/Op/Reshape.h b/compiler/tflchef/tflite/src/Op/include/Reshape.h index be9fdac08..be9fdac08 100644 --- a/compiler/tflchef/tflite/src/Op/Reshape.h +++ b/compiler/tflchef/tflite/src/Op/include/Reshape.h diff --git a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h index 98c49c534..98c49c534 100644 --- a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h +++ b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h diff --git a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h index 5090bb938..5090bb938 100644 --- a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h +++ b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h diff --git a/compiler/tflchef/tflite/src/Op/ReverseSequence.h b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h index 8c8c811e4..8c8c811e4 100644 --- a/compiler/tflchef/tflite/src/Op/ReverseSequence.h +++ b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h diff --git a/compiler/tflchef/tflite/src/Op/ReverseV2.h b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h index 6a8a75e6b..6a8a75e6b 100644 --- a/compiler/tflchef/tflite/src/Op/ReverseV2.h +++ b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h diff --git a/compiler/tflchef/tflite/src/Op/Round.h b/compiler/tflchef/tflite/src/Op/include/Round.h index df0da3fa1..df0da3fa1 100644 --- a/compiler/tflchef/tflite/src/Op/Round.h +++ b/compiler/tflchef/tflite/src/Op/include/Round.h diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.h b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h index 5d68344c2..5d68344c2 100644 --- a/compiler/tflchef/tflite/src/Op/Rsqrt.h +++ b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/include/SVDF.h index a59ca54a2..a59ca54a2 100644 --- a/compiler/tflchef/tflite/src/Op/SVDF.h +++ b/compiler/tflchef/tflite/src/Op/include/SVDF.h diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.h b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h index 76362d775..76362d775 100644 --- a/compiler/tflchef/tflite/src/Op/ScatterNd.h +++ b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.h b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h index d20e63bd7..d20e63bd7 100644 --- a/compiler/tflchef/tflite/src/Op/SegmentSum.h +++ b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h diff --git a/compiler/tflchef/tflite/src/Op/Select.h b/compiler/tflchef/tflite/src/Op/include/Select.h index bf8e57d78..bf8e57d78 100644 --- a/compiler/tflchef/tflite/src/Op/Select.h +++ b/compiler/tflchef/tflite/src/Op/include/Select.h diff --git a/compiler/tflchef/tflite/src/Op/SelectV2.h b/compiler/tflchef/tflite/src/Op/include/SelectV2.h index ff03341d7..ff03341d7 100644 --- a/compiler/tflchef/tflite/src/Op/SelectV2.h +++ b/compiler/tflchef/tflite/src/Op/include/SelectV2.h diff --git a/compiler/tflchef/tflite/src/Op/Shape.h b/compiler/tflchef/tflite/src/Op/include/Shape.h index ebe1befb3..ebe1befb3 100644 --- a/compiler/tflchef/tflite/src/Op/Shape.h +++ b/compiler/tflchef/tflite/src/Op/include/Shape.h diff --git a/compiler/tflchef/tflite/src/Op/Sin.h b/compiler/tflchef/tflite/src/Op/include/Sin.h index 51eabceb5..51eabceb5 100644 --- a/compiler/tflchef/tflite/src/Op/Sin.h +++ b/compiler/tflchef/tflite/src/Op/include/Sin.h diff --git a/compiler/tflchef/tflite/src/Op/Slice.h b/compiler/tflchef/tflite/src/Op/include/Slice.h index 6ca6724d3..6ca6724d3 100644 --- a/compiler/tflchef/tflite/src/Op/Slice.h +++ b/compiler/tflchef/tflite/src/Op/include/Slice.h diff --git a/compiler/tflchef/tflite/src/Op/Softmax.h b/compiler/tflchef/tflite/src/Op/include/Softmax.h index cf168bdd9..cf168bdd9 100644 --- a/compiler/tflchef/tflite/src/Op/Softmax.h +++ b/compiler/tflchef/tflite/src/Op/include/Softmax.h diff --git a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h index 9d7bc44e8..9d7bc44e8 100644 --- a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h +++ b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h diff --git a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h index 784ad940a..784ad940a 100644 --- a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h +++ b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h diff --git a/compiler/tflchef/tflite/src/Op/SparseToDense.h b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h index 5ffe4789d..5ffe4789d 100644 --- a/compiler/tflchef/tflite/src/Op/SparseToDense.h +++ b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h diff --git a/compiler/tflchef/tflite/src/Op/Split.h b/compiler/tflchef/tflite/src/Op/include/Split.h index af247a1b9..af247a1b9 100644 --- a/compiler/tflchef/tflite/src/Op/Split.h +++ b/compiler/tflchef/tflite/src/Op/include/Split.h diff --git a/compiler/tflchef/tflite/src/Op/SplitV.h b/compiler/tflchef/tflite/src/Op/include/SplitV.h index 3f715b5f9..3f715b5f9 100644 --- a/compiler/tflchef/tflite/src/Op/SplitV.h +++ b/compiler/tflchef/tflite/src/Op/include/SplitV.h diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.h b/compiler/tflchef/tflite/src/Op/include/Sqrt.h index 9f0ad04ae..9f0ad04ae 100644 --- a/compiler/tflchef/tflite/src/Op/Sqrt.h +++ b/compiler/tflchef/tflite/src/Op/include/Sqrt.h diff --git a/compiler/tflchef/tflite/src/Op/Square.h b/compiler/tflchef/tflite/src/Op/include/Square.h index 9c008fe52..9c008fe52 100644 --- a/compiler/tflchef/tflite/src/Op/Square.h +++ b/compiler/tflchef/tflite/src/Op/include/Square.h diff --git a/compiler/tflchef/tflite/src/Op/SquaredDifference.h b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h index 58c2ed460..58c2ed460 100644 --- a/compiler/tflchef/tflite/src/Op/SquaredDifference.h +++ b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h diff --git a/compiler/tflchef/tflite/src/Op/Squeeze.h b/compiler/tflchef/tflite/src/Op/include/Squeeze.h index b6c89f73d..b6c89f73d 100644 --- a/compiler/tflchef/tflite/src/Op/Squeeze.h +++ b/compiler/tflchef/tflite/src/Op/include/Squeeze.h diff --git a/compiler/tflchef/tflite/src/Op/StridedSlice.h b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h index 98054b9b9..98054b9b9 100644 --- a/compiler/tflchef/tflite/src/Op/StridedSlice.h +++ b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h diff --git a/compiler/tflchef/tflite/src/Op/Sub.h b/compiler/tflchef/tflite/src/Op/include/Sub.h index 2168e5e0d..2168e5e0d 100644 --- a/compiler/tflchef/tflite/src/Op/Sub.h +++ b/compiler/tflchef/tflite/src/Op/include/Sub.h diff --git a/compiler/tflchef/tflite/src/Op/Sum.h b/compiler/tflchef/tflite/src/Op/include/Sum.h index 38eeb080d..38eeb080d 100644 --- a/compiler/tflchef/tflite/src/Op/Sum.h +++ b/compiler/tflchef/tflite/src/Op/include/Sum.h diff --git a/compiler/tflchef/tflite/src/Op/Tanh.h b/compiler/tflchef/tflite/src/Op/include/Tanh.h index 7339e4103..7339e4103 100644 --- a/compiler/tflchef/tflite/src/Op/Tanh.h +++ b/compiler/tflchef/tflite/src/Op/include/Tanh.h diff --git a/compiler/tflchef/tflite/src/Op/Tile.h b/compiler/tflchef/tflite/src/Op/include/Tile.h index 640f52a1f..640f52a1f 100644 --- a/compiler/tflchef/tflite/src/Op/Tile.h +++ b/compiler/tflchef/tflite/src/Op/include/Tile.h diff --git a/compiler/tflchef/tflite/src/Op/TopKV2.h b/compiler/tflchef/tflite/src/Op/include/TopKV2.h index b2b74cc75..b2b74cc75 100644 --- a/compiler/tflchef/tflite/src/Op/TopKV2.h +++ b/compiler/tflchef/tflite/src/Op/include/TopKV2.h diff --git a/compiler/tflchef/tflite/src/Op/Transpose.h b/compiler/tflchef/tflite/src/Op/include/Transpose.h index f0d944b6b..f0d944b6b 100644 --- a/compiler/tflchef/tflite/src/Op/Transpose.h +++ b/compiler/tflchef/tflite/src/Op/include/Transpose.h diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.h b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h index c79cdabd2..c79cdabd2 100644 --- a/compiler/tflchef/tflite/src/Op/TransposeConv.h +++ b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h diff --git a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h index cc4e5fb0f..cc4e5fb0f 100644 --- a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h +++ b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h diff --git a/compiler/tflchef/tflite/src/Op/Unique.h b/compiler/tflchef/tflite/src/Op/include/Unique.h index fae037c9f..fae037c9f 100644 --- a/compiler/tflchef/tflite/src/Op/Unique.h +++ b/compiler/tflchef/tflite/src/Op/include/Unique.h diff --git a/compiler/tflchef/tflite/src/Op/Unpack.h b/compiler/tflchef/tflite/src/Op/include/Unpack.h index 1036bdc14..1036bdc14 100644 --- a/compiler/tflchef/tflite/src/Op/Unpack.h +++ b/compiler/tflchef/tflite/src/Op/include/Unpack.h diff --git a/compiler/tflchef/tflite/src/Op/Where.h b/compiler/tflchef/tflite/src/Op/include/Where.h index 00cdc4b00..00cdc4b00 100644 --- a/compiler/tflchef/tflite/src/Op/Where.h +++ b/compiler/tflchef/tflite/src/Op/include/Where.h diff --git a/compiler/tflchef/tflite/src/Op/ZerosLike.h b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h index 163c1fa21..163c1fa21 100644 --- a/compiler/tflchef/tflite/src/Op/ZerosLike.h +++ b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h index b38b35a61..1b9d420e5 100644 --- a/compiler/tflchef/tflite/src/TFliteOpChefs.h +++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h @@ -18,115 +18,115 @@ #define __TFLITE_OP_CHEFS_H__ // In alphabet order -#include "Op/Abs.h" -#include "Op/Add.h" -#include "Op/AddN.h" -#include "Op/ArgMax.h" -#include "Op/ArgMin.h" -#include "Op/AveragePool2D.h" -#include "Op/BatchMatMul.h" -#include "Op/BatchToSpaceND.h" -#include "Op/BidirectionalSequenceLSTM.h" -#include "Op/Cast.h" -#include "Op/Ceil.h" -#include "Op/Concatenation.h" -#include "Op/Conv2D.h" -#include "Op/Cos.h" -#include "Op/DepthToSpace.h" -#include "Op/DepthwiseConv2D.h" -#include "Op/Dequantize.h" -#include "Op/Div.h" -#include "Op/ELU.h" -#include "Op/Equal.h" -#include "Op/Exp.h" -#include "Op/ExpandDims.h" -#include "Op/FakeQuant.h" -#include "Op/Fill.h" -#include "Op/Floor.h" -#include "Op/FloorDiv.h" -#include "Op/FloorMod.h" -#include "Op/FullyConnected.h" -#include "Op/Gather.h" -#include "Op/GatherNd.h" -#include "Op/Greater.h" -#include "Op/GreaterEqual.h" -#include "Op/L2Normalize.h" -#include "Op/L2Pool2D.h" -#include "Op/LeakyRelu.h" -#include "Op/Less.h" -#include "Op/LessEqual.h" -#include "Op/LocalResponseNormalization.h" -#include "Op/Log.h" -#include "Op/LogicalAnd.h" -#include "Op/LogicalNot.h" -#include "Op/LogicalOr.h" -#include "Op/Logistic.h" -#include "Op/LogSoftmax.h" -#include "Op/MatrixDiag.h" -#include "Op/MatrixSetDiag.h" -#include "Op/Maximum.h" -#include "Op/MaxPool2D.h" -#include "Op/Mean.h" -#include "Op/Minimum.h" -#include "Op/MirrorPad.h" -#include "Op/Mul.h" -#include "Op/Neg.h" -#include "Op/NonMaxSuppressionV4.h" -#include "Op/NonMaxSuppressionV5.h" -#include "Op/NotEqual.h" -#include "Op/OneHot.h" -#include "Op/Pack.h" -#include "Op/Pad.h" -#include "Op/PadV2.h" -#include "Op/Pow.h" -#include "Op/PRelu.h" -#include "Op/Quantize.h" -#include "Op/Range.h" -#include "Op/Rank.h" -#include "Op/ReduceAny.h" -#include "Op/ReduceMax.h" -#include "Op/ReduceMin.h" -#include "Op/ReduceProd.h" -#include "Op/ReLU.h" -#include "Op/ReLU6.h" -#include "Op/ReLUN1To1.h" -#include "Op/Reshape.h" -#include "Op/ResizeBilinear.h" -#include "Op/ResizeNearestNeighbor.h" -#include "Op/ReverseSequence.h" -#include "Op/ReverseV2.h" -#include "Op/Round.h" -#include "Op/Rsqrt.h" -#include "Op/ScatterNd.h" -#include "Op/SegmentSum.h" -#include "Op/Select.h" -#include "Op/SelectV2.h" -#include "Op/Shape.h" -#include "Op/Sin.h" -#include "Op/Slice.h" -#include "Op/Softmax.h" -#include "Op/SpaceToBatchND.h" -#include "Op/SpaceToDepth.h" -#include "Op/SparseToDense.h" -#include "Op/Split.h" -#include "Op/SplitV.h" -#include "Op/Sqrt.h" -#include "Op/Square.h" -#include "Op/SquaredDifference.h" -#include "Op/Squeeze.h" -#include "Op/StridedSlice.h" -#include "Op/Sub.h" -#include "Op/Sum.h" -#include "Op/SVDF.h" -#include "Op/Tanh.h" -#include "Op/Tile.h" -#include "Op/TopKV2.h" -#include "Op/Transpose.h" -#include "Op/TransposeConv.h" -#include "Op/UnidirectionalSequenceLSTM.h" -#include "Op/Unique.h" -#include "Op/Unpack.h" -#include "Op/Where.h" -#include "Op/ZerosLike.h" +#include "Op/include/Abs.h" +#include "Op/include/Add.h" +#include "Op/include/AddN.h" +#include "Op/include/ArgMax.h" +#include "Op/include/ArgMin.h" +#include "Op/include/AveragePool2D.h" +#include "Op/include/BatchMatMul.h" +#include "Op/include/BatchToSpaceND.h" +#include "Op/include/BidirectionalSequenceLSTM.h" +#include "Op/include/Cast.h" +#include "Op/include/Ceil.h" +#include "Op/include/Concatenation.h" +#include "Op/include/Conv2D.h" +#include "Op/include/Cos.h" +#include "Op/include/DepthToSpace.h" +#include "Op/include/DepthwiseConv2D.h" +#include "Op/include/Dequantize.h" +#include "Op/include/Div.h" +#include "Op/include/ELU.h" +#include "Op/include/Equal.h" +#include "Op/include/Exp.h" +#include "Op/include/ExpandDims.h" +#include "Op/include/FakeQuant.h" +#include "Op/include/Fill.h" +#include "Op/include/Floor.h" +#include "Op/include/FloorDiv.h" +#include "Op/include/FloorMod.h" +#include "Op/include/FullyConnected.h" +#include "Op/include/Gather.h" +#include "Op/include/GatherNd.h" +#include "Op/include/Greater.h" +#include "Op/include/GreaterEqual.h" +#include "Op/include/L2Normalize.h" +#include "Op/include/L2Pool2D.h" +#include "Op/include/LeakyRelu.h" +#include "Op/include/Less.h" +#include "Op/include/LessEqual.h" +#include "Op/include/LocalResponseNormalization.h" +#include "Op/include/Log.h" +#include "Op/include/LogicalAnd.h" +#include "Op/include/LogicalNot.h" +#include "Op/include/LogicalOr.h" +#include "Op/include/Logistic.h" +#include "Op/include/LogSoftmax.h" +#include "Op/include/MatrixDiag.h" +#include "Op/include/MatrixSetDiag.h" +#include "Op/include/Maximum.h" +#include "Op/include/MaxPool2D.h" +#include "Op/include/Mean.h" +#include "Op/include/Minimum.h" +#include "Op/include/MirrorPad.h" +#include "Op/include/Mul.h" +#include "Op/include/Neg.h" +#include "Op/include/NonMaxSuppressionV4.h" +#include "Op/include/NonMaxSuppressionV5.h" +#include "Op/include/NotEqual.h" +#include "Op/include/OneHot.h" +#include "Op/include/Pack.h" +#include "Op/include/Pad.h" +#include "Op/include/PadV2.h" +#include "Op/include/Pow.h" +#include "Op/include/PRelu.h" +#include "Op/include/Quantize.h" +#include "Op/include/Range.h" +#include "Op/include/Rank.h" +#include "Op/include/ReduceAny.h" +#include "Op/include/ReduceMax.h" +#include "Op/include/ReduceMin.h" +#include "Op/include/ReduceProd.h" +#include "Op/include/ReLU.h" +#include "Op/include/ReLU6.h" +#include "Op/include/ReLUN1To1.h" +#include "Op/include/Reshape.h" +#include "Op/include/ResizeBilinear.h" +#include "Op/include/ResizeNearestNeighbor.h" +#include "Op/include/ReverseSequence.h" +#include "Op/include/ReverseV2.h" +#include "Op/include/Round.h" +#include "Op/include/Rsqrt.h" +#include "Op/include/ScatterNd.h" +#include "Op/include/SegmentSum.h" +#include "Op/include/Select.h" +#include "Op/include/SelectV2.h" +#include "Op/include/Shape.h" +#include "Op/include/Sin.h" +#include "Op/include/Slice.h" +#include "Op/include/Softmax.h" +#include "Op/include/SpaceToBatchND.h" +#include "Op/include/SpaceToDepth.h" +#include "Op/include/SparseToDense.h" +#include "Op/include/Split.h" +#include "Op/include/SplitV.h" +#include "Op/include/Sqrt.h" +#include "Op/include/Square.h" +#include "Op/include/SquaredDifference.h" +#include "Op/include/Squeeze.h" +#include "Op/include/StridedSlice.h" +#include "Op/include/Sub.h" +#include "Op/include/Sum.h" +#include "Op/include/SVDF.h" +#include "Op/include/Tanh.h" +#include "Op/include/Tile.h" +#include "Op/include/TopKV2.h" +#include "Op/include/Transpose.h" +#include "Op/include/TransposeConv.h" +#include "Op/include/UnidirectionalSequenceLSTM.h" +#include "Op/include/Unique.h" +#include "Op/include/Unpack.h" +#include "Op/include/Where.h" +#include "Op/include/ZerosLike.h" #endif // __TFLITE_OP_CHEFS_H__ diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp index d4605ced3..f6c6789bd 100644 --- a/compiler/tflchef/tools/file/Driver.cpp +++ b/compiler/tflchef/tools/file/Driver.cpp @@ -28,10 +28,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("recipe") - .type(arser::DataType::STR) - .help("Source recipe file path to convert"); - arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path"); + arser.add_argument("recipe").help("Source recipe file path to convert"); + arser.add_argument("tflite").help("Target tflite file path"); try { diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp index 1451e8bb8..119bee6be 100644 --- a/compiler/tflchef/tools/reverse/Driver.cpp +++ b/compiler/tflchef/tools/reverse/Driver.cpp @@ -25,10 +25,8 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("tflite") - .type(arser::DataType::STR) - .help("Source tflite file path to convert"); - arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path"); + arser.add_argument("tflite").help("Source tflite file path to convert"); + arser.add_argument("recipe").help("Target recipe file path"); try { diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt index fac0be6bf..410232645 100644 --- a/compiler/tfldump/CMakeLists.txt +++ b/compiler/tfldump/CMakeLists.txt @@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(tfldump ${DRIVER} ${SOURCES}) target_include_directories(tfldump PRIVATE include) target_link_libraries(tfldump arser) +target_link_libraries(tfldump foder) target_link_libraries(tfldump mio_tflite280) target_link_libraries(tfldump mio_tflite280_helper) target_link_libraries(tfldump safemain) diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp index 38c9c062f..a3e748be1 100644 --- a/compiler/tfldump/driver/Driver.cpp +++ b/compiler/tfldump/driver/Driver.cpp @@ -15,7 +15,7 @@ */ #include <arser/arser.h> -#include <tflread/Model.h> +#include <foder/FileLoader.h> #include <tfldump/Dump.h> #include <iostream> @@ -23,7 +23,7 @@ int entry(int argc, char **argv) { arser::Arser arser; - arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to dump"); + arser.add_argument("tflite").help("TFLite file to dump"); try { @@ -38,14 +38,9 @@ int entry(int argc, char **argv) std::string tflite_path = arser.get<std::string>("tflite"); // Load TF lite model from a tflite file - std::unique_ptr<tflread::Model> model = tflread::load_tflite(tflite_path); - if (model == nullptr) - { - std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl; - return 255; - } - - const tflite::Model *tflmodel = model->model(); + foder::FileLoader fileLoader{tflite_path}; + std::vector<char> modelData = fileLoader.load(); + const tflite::Model *tflmodel = tflite::GetModel(modelData.data()); if (tflmodel == nullptr) { std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl; diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake index b1abf9486..a11f6b200 100644 --- a/compiler/tfldump/requires.cmake +++ b/compiler/tfldump/requires.cmake @@ -1,3 +1,4 @@ require("arser") +require("foder") require("mio-tflite280") require("safemain") diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 2a87e47d7..4388fcde8 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -33,7 +33,7 @@ void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t am std::ios_base::fmtflags saveflags(os.flags()); bool second = false; - bool ellipsis = amount > 0 && size > 4; + bool ellipsis = amount > 0 && size > 8; size_t count = ellipsis ? std::min(size, amount) : size; for (size_t i = 0; i < count; i++) @@ -103,8 +103,8 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect) if (fbvect == nullptr) return os; - bool ellipsis = (fbvect->size() > 4); - auto limit_size = ellipsis ? 4 : fbvect->size(); + bool ellipsis = (fbvect->size() > 8); + auto limit_size = ellipsis ? 8 : fbvect->size(); if (ellipsis) { diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp deleted file mode 100644 index d2f6e06f1..000000000 --- a/compiler/tfldump/src/Load.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <tflread/Model.h> - -#include <fcntl.h> -#include <unistd.h> -#include <sys/stat.h> -#include <sys/mman.h> - -namespace -{ - -class MemoryMappedModel final : public tflread::Model -{ -public: - /** - * @require fd and data SHOULD be valid - */ - explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size} - { - // DO NOTHING - } - -public: - ~MemoryMappedModel() - { - munmap(_data, _size); - close(_fd); - } - -public: - MemoryMappedModel(const MemoryMappedModel &) = delete; - MemoryMappedModel(MemoryMappedModel &&) = delete; - -public: - const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); } - -private: - int _fd = -1; - void *_data = nullptr; - size_t _size = 0; -}; - -class FileDescriptor final -{ -public: - FileDescriptor(int value) : _value{value} - { - // DO NOTHING - } - -public: - // NOTE Copy is not allowed - FileDescriptor(const FileDescriptor &) = delete; - -public: - // NOTE Move is allowed - FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); } - -public: - ~FileDescriptor() - { - if (_value != -1) - { - // Close on destructor - close(_value); - } - } - -public: - int value(void) const { return _value; } - -public: - int release(void) - { - auto res = _value; - _value = -1; - return res; - } - -private: - int _value = -1; -}; - -} // namespace - -namespace tflread -{ - -std::unique_ptr<Model> load_tflite(const std::string &path) -{ - FileDescriptor fd = open(path.c_str(), O_RDONLY); - - if (fd.value() == -1) - { - // Return nullptr on open failure - return nullptr; - } - - struct stat st; - if (fstat(fd.value(), &st) == -1) - { - // Return nullptr on fstat failure - return nullptr; - } - - auto size = st.st_size; - auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0); - - if (data == MAP_FAILED) - { - // Return nullptr on mmap failure - return nullptr; - } - - return std::unique_ptr<tflread::Model>{new MemoryMappedModel(fd.release(), data, size)}; -} - -} // namespace tflread diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp index 47edcb086..2e8e7134f 100644 --- a/compiler/tfldump/src/OpPrinter.cpp +++ b/compiler/tfldump/src/OpPrinter.cpp @@ -736,6 +736,7 @@ OpPrinterRegistry::OpPrinterRegistry() // There is no Option for CEIL _op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>(); _op_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>(); + // There is no Option for DENSIFY _op_map[tflite::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>(); _op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>(); // There is no Option for DEQUANTIZE diff --git a/compiler/tflite2circle-conversion-test/CMakeLists.txt b/compiler/tflite2circle-conversion-test/CMakeLists.txt index 83fe23a8f..2e67d48bd 100644 --- a/compiler/tflite2circle-conversion-test/CMakeLists.txt +++ b/compiler/tflite2circle-conversion-test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + nnas_include(TargetRequire) unset(REQUIRED_TARGETS) diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp index fb8c211b6..6afe1b0f2 100644 --- a/compiler/tflite2circle/driver/Driver.cpp +++ b/compiler/tflite2circle/driver/Driver.cpp @@ -36,24 +36,11 @@ int entry(int argc, char **argv) { arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"}; - arser.add_argument("--version") - .nargs(0) - .required(false) - .default_value(false) - .help("Show version information and exit") - .exit_with(print_version); - - arser.add_argument("-V", "--verbose") - .nargs(0) - .required(false) - .default_value(false) - .help("output additional information to stdout or stderr"); - - arser.add_argument("tflite") - .nargs(1) - .type(arser::DataType::STR) - .help("Source tflite file path to convert"); - arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path"); + arser::Helper::add_version(arser, print_version); + arser::Helper::add_verbose(arser); + + arser.add_argument("tflite").help("Source tflite file path to convert"); + arser.add_argument("circle").help("Target circle file path"); try { diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h index 88a4f71df..8149197f6 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions.h +++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h @@ -31,8 +31,10 @@ #include "BuildBuiltinOptions/ConcatenationOptions.h" #include "BuildBuiltinOptions/Conv2DOptions.h" #include "BuildBuiltinOptions/CosOptions.h" +#include "BuildBuiltinOptions/DensifyOptions.h" #include "BuildBuiltinOptions/DepthToSpaceOptions.h" #include "BuildBuiltinOptions/DepthwiseConv2DOptions.h" +#include "BuildBuiltinOptions/DequantizeOptions.h" #include "BuildBuiltinOptions/DivOptions.h" #include "BuildBuiltinOptions/EqualOptions.h" #include "BuildBuiltinOptions/ExpandDimsOptions.h" diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp new file mode 100644 index 000000000..4e5863576 --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DensifyOptions.h" + +namespace tflite2circle +{ + +flatbuffers::Offset<circle::DensifyOptions> +build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *) +{ + circle::DensifyOptionsBuilder builtin_options_builder{fb}; + return builtin_options_builder.Finish(); +} + +} // namespace tflite2circle diff --git a/compiler/tfldump/include/tflread/Model.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h index c6e4a94ac..b6126c4e2 100644 --- a/compiler/tfldump/include/tflread/Model.h +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,30 +14,18 @@ * limitations under the License. */ -#ifndef __TFLREAD_MODEL_H__ -#define __TFLREAD_MODEL_H__ +#ifndef __BBO_DENSIFY_OPTIONS_H__ +#define __BBO_DENSIFY_OPTIONS_H__ #include <mio/tflite/schema_generated.h> +#include <mio/circle/schema_generated.h> -#include <memory> - -namespace tflread -{ - -struct Model +namespace tflite2circle { - virtual ~Model() = default; - virtual const ::tflite::Model *model(void) const = 0; -}; - -/** - * @brief Load TensorFlow Lite model (as a raw Model) from a given path - * - * @note May return a nullptr - */ -std::unique_ptr<Model> load_tflite(const std::string &path); +flatbuffers::Offset<circle::DensifyOptions> +build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op); -} // namespace tflread +} // namespace tflite2circle -#endif // __TFLREAD_MODEL_H__ +#endif // __BBO_DENSIFY_OPTIONS_H__ diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp new file mode 100644 index 000000000..eeacece6a --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DequantizeOptions.h" +#include "DataLookup.h" + +namespace tflite2circle +{ + +flatbuffers::Offset<circle::DequantizeOptions> +build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op) +{ + circle::DequantizeOptionsBuilder builtin_options_builder{fb}; + return builtin_options_builder.Finish(); +} + +} // namespace tflite2circle diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h new file mode 100644 index 000000000..1cb9f9c1a --- /dev/null +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BBO_DEQUANTIZE_OPTIONS_H__ +#define __BBO_DEQUANTIZE_OPTIONS_H__ + +#include <mio/tflite/schema_generated.h> +#include <mio/circle/schema_generated.h> + +namespace tflite2circle +{ + +flatbuffers::Offset<circle::DequantizeOptions> +build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op); + +} // namespace tflite2circle + +#endif // __BBO_DEQUANTIZE_OPTIONS_H__ diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp index d2d2888f2..db88d3e82 100644 --- a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp +++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp @@ -25,8 +25,6 @@ namespace tflite2circle flatbuffers::Offset<circle::MaximumMinimumOptions> build_circle_MaximumMinimumOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op) { - auto tflite_builtin_options = op->builtin_options_as_MaximumMinimumOptions(); - assert(tflite_builtin_options); circle::MaximumMinimumOptionsBuilder builtin_options_builder{fb}; return builtin_options_builder.Finish(); } diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp index d483b288f..ac017b8f1 100644 --- a/compiler/tflite2circle/src/CircleModel.cpp +++ b/compiler/tflite2circle/src/CircleModel.cpp @@ -344,8 +344,13 @@ template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_fla circle::OperatorCodeBuilder operator_code_builder{*_fb}; auto de_code = it->deprecated_builtin_code(); auto bt_code = it->builtin_code(); - operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code)); - operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code)); + auto cir_de_code = get_circle_builtin_code(de_code); + auto cir_bt_code = get_circle_builtin_code(bt_code); + // correct bt_code where bt_code == 0 for old tflite format + if (cir_bt_code == 0) + cir_bt_code = static_cast<circle::BuiltinOperator>(cir_de_code); + operator_code_builder.add_deprecated_builtin_code(cir_de_code); + operator_code_builder.add_builtin_code(cir_bt_code); operator_code_builder.add_custom_code(custom_code); operator_code_builder.add_version(it->version()); auto code = operator_code_builder.Finish(); diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst index d55ba464a..9cbf8032a 100644 --- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst +++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst @@ -42,7 +42,7 @@ TFL_BUILTIN_OPTIONS(TopKV2Options) TFL_BUILTIN_OPTIONS(SplitOptions) TFL_BUILTIN_OPTIONS(LogSoftmaxOptions) TFL_BUILTIN_OPTIONS(CastOptions) -//TFL_BUILTIN_OPTIONS(DequantizeOptions) +TFL_BUILTIN_OPTIONS(DequantizeOptions) TFL_BUILTIN_OPTIONS(MaximumMinimumOptions) TFL_BUILTIN_OPTIONS(ArgMaxOptions) TFL_BUILTIN_OPTIONS(LessOptions) @@ -106,3 +106,4 @@ TFL_BUILTIN_OPTIONS(RankOptions) TFL_BUILTIN_OPTIONS(ScatterNdOptions) TFL_BUILTIN_OPTIONS(SegmentSumOptions) TFL_BUILTIN_OPTIONS(BatchMatMulOptions) +TFL_BUILTIN_OPTIONS(DensifyOptions) diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 3841a1b78..93c33cdbd 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x0000000000140001) + set(VCONONE_VERSION 0x0000000000150001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp index d94a7ada6..cebf7d998 100644 --- a/compiler/vconone/src/version.cpp +++ b/compiler/vconone/src/version.cpp @@ -54,7 +54,7 @@ std::string get_string(void) std::string get_copyright(void) { std::string str; - str = "Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All Rights Reserved\r\n"; + str = "Copyright (c) 2020-2022 Samsung Electronics Co., Ltd. All Rights Reserved\r\n"; str += "Licensed under the Apache License, Version 2.0\r\n"; str += "https://github.com/Samsung/ONE"; return str; |