diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2021-10-19 11:32:46 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2021-10-19 11:32:46 +0900 |
commit | 33ae5d70a1ed85d215c1293ed63afbf3517b07d5 (patch) | |
tree | 9f1ace0f4760a8f7903ef15e2e92f1d1401e4b1e | |
parent | f4cf19e579a19c5346ccb2aad55bfd251065e447 (diff) | |
download | nnfw-33ae5d70a1ed85d215c1293ed63afbf3517b07d5.tar.gz nnfw-33ae5d70a1ed85d215c1293ed63afbf3517b07d5.tar.bz2 nnfw-33ae5d70a1ed85d215c1293ed63afbf3517b07d5.zip |
Imported Upstream version 1.18.0upstream/1.18.0submit/tizen/20211028.014856submit/tizen/20211019.023737accepted/tizen/unified/20211101.140244
504 files changed, 17146 insertions, 3618 deletions
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml index 9c0a8d881..86d272d8a 100644 --- a/.ahub/tcchecker-tca/config.yaml +++ b/.ahub/tcchecker-tca/config.yaml @@ -25,21 +25,6 @@ test: any: true - extension: cc any: true - - excludes : - - DepthwiseConv2D.cc - - ArgMinMax.cc - - AveragePool2D.cc - - Concat.cc - - DepthToSpace.cc - - DepthwiseConv2D.cc - - Fill.cc - - If.cc - - Pad.cc - - Reduce.cc - - ResizeBilinear.c - - Slice.cc - - Softmax.cc - - While.cc testCase: - condition: - functionName: diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h index f2a7a2b85..1703e421e 100644 --- a/compiler/arser/include/arser/arser.h +++ b/compiler/arser/include/arser/arser.h @@ -238,6 +238,18 @@ public: return *this; } + Argument &accumulated(void) + { + _is_accumulated = true; + return *this; + } + + Argument &accumulated(bool value) + { + _is_accumulated = value; + return *this; + } + Argument &help(std::string help_message) { _help_message = help_message; @@ -296,7 +308,9 @@ private: std::function<void(void)> _func; uint32_t _nargs{1}; bool _is_required{false}; + bool _is_accumulated{false}; std::vector<std::string> _values; + std::vector<std::vector<std::string>> _accum_values; friend class Arser; friend std::ostream &operator<<(std::ostream &, const Arser &); @@ -403,6 +417,8 @@ public: throw std::runtime_error("Invalid arguments. Positional argument must always be required."); } } + // TODO accumulated arguments shouldn't be enabled to positional arguments. + // TODO accumulated arguments shouldn't be enabled to optional arguments whose `narg` == 0. } void parse(int argc, char **argv) @@ -475,6 +491,11 @@ public: "You must have missed some argument."); arg->second->_values.emplace_back(argv[c++]); } + // accumulate values + if (arg->second->_is_accumulated) + { + arg->second->_accum_values.emplace_back(arg->second->_values); + } if (arg->second->_nargs == 0) { // TODO std::boolalpha for true or false @@ -493,6 +514,9 @@ public: if (arg == _arg_map.end()) return false; + if (arg->second->_is_accumulated) + return arg->second->_accum_values.size() > 0 ? true : false; + return arg->second->_values.size() > 0 ? true : false; } @@ -500,6 +524,9 @@ public: template <typename T> std::vector<T> get_impl(const std::string &arg_name, std::vector<T> *); + template <typename T> + std::vector<std::vector<T>> get_impl(const std::string &arg_name, std::vector<std::vector<T>> *); + template <typename T> T get(const std::string &arg_name); friend std::ostream &operator<<(std::ostream &stream, const Arser &parser) @@ -617,6 +644,12 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *) "There is no argument you are looking for: " + arg_name); + if (arg->second->_is_accumulated) + throw std::runtime_error( + "Type mismatch. " + "You called get using a type different from the one you specified." + "Accumulated argument is returned as std::vector of the specified type"); + if (arg->second->_type != TypeName<T>::Get()) throw std::runtime_error("Type mismatch. " "You called get() method with a type different " @@ -640,6 +673,22 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name "There is no argument you are looking for: " + arg_name); + // Accumulated arguments with scalar type (e.g., STR) + if (arg->second->_is_accumulated) + { + if (arg->second->_type != TypeName<T>::Get()) + throw std::runtime_error("Type mismatch. " + "You called get using a type different from the one you specified."); + + std::vector<T> data; + for (auto values : arg->second->_accum_values) + { + assert(values.size() == 1); + data.emplace_back(internal::lexical_cast<T>(values[0])); + } + return data; + } + if (arg->second->_type != TypeName<std::vector<T>>::Get()) throw std::runtime_error("Type mismatch. " "You called get using a type different from the one you specified."); @@ -650,6 +699,39 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name return data; } +// Accumulated arguments with vector type (e.g., STR_VEC) +template <typename T> +std::vector<std::vector<T>> Arser::get_impl(const std::string &arg_name, + std::vector<std::vector<T>> *) +{ + auto arg = _arg_map.find(arg_name); + if (arg == _arg_map.end()) + throw std::runtime_error("Invalid argument. " + "There is no argument you are looking for: " + + arg_name); + + if (not arg->second->_is_accumulated) + throw std::runtime_error("Type mismatch. " + "You called get using a type different from the one you specified."); + + if (arg->second->_type != TypeName<std::vector<T>>::Get()) + throw std::runtime_error( + "Type mismatch. " + "You called get using a type different from the one you specified." + "Accumulated argument is returned as std::vector of the specified type"); + + std::vector<std::vector<T>> result; + for (auto values : arg->second->_accum_values) + { + std::vector<T> data; + std::transform(values.begin(), values.end(), std::back_inserter(data), + [](std::string str) -> T { return internal::lexical_cast<T>(str); }); + result.emplace_back(data); + } + + return result; +} + template <typename T> T Arser::get(const std::string &arg_name) { return get_impl(arg_name, static_cast<T *>(nullptr)); diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp index b37d0dec3..4e88f0cb7 100644 --- a/compiler/arser/tests/arser.test.cpp +++ b/compiler/arser/tests/arser.test.cpp @@ -93,7 +93,7 @@ TEST(BasicTest, OptionalArgument) EXPECT_THROW(arser.get<bool>("--volume"), std::runtime_error); } -TEST(BasicTest, NonRequiredOptionalArgument) +TEST(BasicTest, NonRequiredOptionalArgument_NEG) { /* arrange */ Arser arser; @@ -111,7 +111,7 @@ TEST(BasicTest, NonRequiredOptionalArgument) EXPECT_THROW(arser.get<int>("--weight"), std::runtime_error); } -TEST(BasicTest, RequiredOptionalArgument) +TEST(BasicTest, RequiredOptionalArgument_NEG) { /* arrange */ Arser arser; @@ -395,7 +395,7 @@ TEST(BasicTest, shortMultipleOption) EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path")); } -TEST(BasicTest, OptWithRequiredDuplicate) +TEST(BasicTest, OptWithRequiredDuplicate_NEG) { /* arrange */ Arser arser; @@ -441,3 +441,61 @@ TEST(BasicTest, OptWithNonRequiredDuplicate) EXPECT_TRUE(arser["--output_path"]); EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path")); } + +TEST(BasicTest, AccumulateVectorOptions) +{ + /* arrange */ + Arser arser; + + arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC); + + Prompt prompt("./driver --specify a b c --specify 1 2 3"); + /* act */ + arser.parse(prompt.argc(), prompt.argv()); + /* assert */ + EXPECT_TRUE(arser["--specify"]); + + auto specify = arser.get<std::vector<std::vector<std::string>>>("--specify"); + auto first = specify[0]; + EXPECT_EQ("a", first.at(0)); + EXPECT_EQ("b", first.at(1)); + EXPECT_EQ("c", first.at(2)); + auto second = specify[1]; + EXPECT_EQ("1", second.at(0)); + EXPECT_EQ("2", second.at(1)); + EXPECT_EQ("3", second.at(2)); +} + +TEST(BasicTest, AccumulateScalarOptions) +{ + /* arrange */ + Arser arser; + + arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT); + + Prompt prompt("./driver --specify 1 --specify 2"); + /* act */ + arser.parse(prompt.argc(), prompt.argv()); + /* assert */ + EXPECT_TRUE(arser["--specify"]); + + auto specify = arser.get<std::vector<float>>("--specify"); + EXPECT_EQ(1, specify.at(0)); + EXPECT_EQ(2, specify.at(1)); +} + +TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG) +{ + /* arrange */ + Arser arser; + + arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT); + + Prompt prompt("./driver --specify 1 --specify 2"); + /* act */ + arser.parse(prompt.argc(), prompt.argv()); + /* assert */ + EXPECT_TRUE(arser["--specify"]); + + EXPECT_THROW(arser.get<float>("--specify"), std::runtime_error); +} diff --git a/compiler/circle-opselector/CMakeLists.txt b/compiler/circle-opselector/CMakeLists.txt new file mode 100644 index 000000000..93ab84c09 --- /dev/null +++ b/compiler/circle-opselector/CMakeLists.txt @@ -0,0 +1,36 @@ +set(DRIVER "driver/Driver.cpp") + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_executable(circle-opselector ${DRIVER} ${SOURCES}) +target_include_directories(circle-opselector PRIVATE src) +target_link_libraries(circle-opselector foder) +target_link_libraries(circle-opselector safemain) +target_link_libraries(circle-opselector loco) +target_link_libraries(circle-opselector luci_import) +target_link_libraries(circle-opselector luci_export) +target_link_libraries(circle-opselector arser) +target_link_libraries(circle-opselector vconone) +target_link_libraries(circle-opselector luci_service) +target_link_libraries(circle-opselector luci_profile) + +install(TARGETS circle-opselector DESTINATION bin) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(circle-opselector-test ${TESTS} ${SOURCES} ${DRIVER}) +target_include_directories(circle-opselector-test PRIVATE src) +target_link_libraries(circle-opselector-test foder) +target_link_libraries(circle-opselector-test loco) +target_link_libraries(circle-opselector-test luci_import) +target_link_libraries(circle-opselector-test luci_export) +target_link_libraries(circle-opselector-test arser) +target_link_libraries(circle-opselector-test vconone) +target_link_libraries(circle-opselector-test luci_service) +target_link_libraries(circle-opselector-test luci_profile) diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md new file mode 100644 index 000000000..c06899ab5 --- /dev/null +++ b/compiler/circle-opselector/README.md @@ -0,0 +1,21 @@ +# circle-opselector
+
+`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
+
+## Example
+
+### 1. Select from location numbers
+
+```bash
+./circle-opselector --by_id "1-3,5" input.circle output.circle
+```
+
+Then, output.circle which has node 1, 2, 3 and 5 will be created.
+
+### 2. Select from node names
+
+```bash
+./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
+```
+
+Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp new file mode 100644 index 000000000..a1ace4f58 --- /dev/null +++ b/compiler/circle-opselector/driver/Driver.cpp @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModuleIO.h" + +#include <luci/Profile/CircleNodeID.h> + +#include <arser/arser.h> +#include <vconone/vconone.h> + +#include <iostream> +#include <string> +#include <vector> +#include <algorithm> +#include <cctype> +#include <numeric> +#include <sstream> + +void print_version(void) +{ + std::cout << "circle-opselector version " << vconone::get_string() << std::endl; + std::cout << vconone::get_copyright() << std::endl; +} + +std::vector<std::string> split_into_vector(const std::string &str, const char &delim) +{ + std::vector<std::string> ret; + std::istringstream is(str); + for (std::string item; std::getline(is, item, delim);) + { + ret.push_back(item); + } + + // remove empty string + ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }), + ret.end()); + + return ret; +} + +bool is_number(const std::string &s) +{ + return !s.empty() && std::find_if(s.begin(), s.end(), + [](unsigned char c) { return !std::isdigit(c); }) == s.end(); +} + +bool is_number(const std::vector<std::string> &vec) +{ + for (const auto &s : vec) + { + if (not::is_number(s)) + { + return false; + } + } + return true; +} + +/** + * @brief Segmentation function for user's '--by_id' input + * + * @note This function tokenizes the input data.s + * First, divide it into ',', and if token has '-', devide it once more into '-'. + * For example, if user input is '12,34,56', it is devided into [12,34,56]. + * If input is '1-2,34,56', it is devided into [[1,2],34,56]. + * And '-' means range so, if input is '2-7', it means all integer between 2-7. + */ +std::vector<uint32_t> split_id_input(const std::string &str) +{ + std::vector<uint32_t> by_id; + + // tokenize colon-separated string + auto colon_tokens = ::split_into_vector(str, ','); + if (colon_tokens.empty()) // input empty line like "". + { + std::cerr << "ERROR: Nothing was entered." << std::endl; + exit(EXIT_FAILURE); + } + for (const auto &ctok : colon_tokens) + { + auto dash_tokens = ::split_into_vector(ctok, '-'); + if (not::is_number(dash_tokens)) + { + std::cerr << "ERROR: To select operator by id, please use these args: [0-9], '-', ','" + << std::endl; + exit(EXIT_FAILURE); + } + // convert string into integer + std::vector<uint32_t> int_tokens; + try + { + std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens), + [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); }); + } + catch (const std::out_of_range &) + { + // if input is big integer like '123467891234', stoi throw this exception. + std::cerr << "ERROR: Argument is out of range." << std::endl; + exit(EXIT_FAILURE); + } + catch (...) + { + std::cerr << "ERROR: Unknown error" << std::endl; + exit(EXIT_FAILURE); + } + + switch (int_tokens.size()) + { + case 0: // inputs like "-" + { + std::cerr << "ERROR: Nothing was entered" << std::endl; + exit(EXIT_FAILURE); + } + case 1: // inputs like "1", "2" + { + by_id.push_back(int_tokens.at(0)); + break; + } + case 2: // inputs like "1-2", "11-50" + { + for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++) + { + by_id.push_back(i); + } + break; + } + default: // inputs like "1-2-3" + { + std::cerr << "ERROR: Too many '-' in str." << std::endl; + exit(EXIT_FAILURE); + } + } + } + + return by_id; +} + +std::vector<std::string> split_name_input(const std::string &str) +{ + return ::split_into_vector(str, ','); +} + +int entry(int argc, char **argv) +{ + // TODO Add new option names! + + arser::Arser arser("circle-opselector provides selecting operations in circle model"); + + arser.add_argument("--version") + .nargs(0) + .default_value(false) + .help("Show version information and exit") + .exit_with(print_version); + + // TODO Add new options! + + arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); + arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); + + // select option + arser.add_argument("--by_id") + .nargs(1) + .type(arser::DataType::STR) + .help("Input operation id to select nodes."); + arser.add_argument("--by_name") + .nargs(1) + .type(arser::DataType::STR) + .help("Input operation name to select nodes."); + + try + { + arser.parse(argc, argv); + } + catch (const std::runtime_error &err) + { + std::cerr << err.what() << std::endl; + std::cout << arser; + return EXIT_FAILURE; + } + + std::string input_path = arser.get<std::string>("input"); + std::string output_path = arser.get<std::string>("output"); + + std::string operator_input; + + std::vector<uint32_t> by_id; + std::vector<std::string> by_name; + + if (!arser["--by_id"] && !arser["--by_name"] || arser["--by_id"] && arser["--by_name"]) + { + std::cerr << "ERROR: Either option '--by_id' or '--by_name' must be specified" << std::endl; + std::cerr << arser; + return EXIT_FAILURE; + } + + if (arser["--by_id"]) + { + operator_input = arser.get<std::string>("--by_id"); + by_id = split_id_input(operator_input); + } + if (arser["--by_name"]) + { + operator_input = arser.get<std::string>("--by_name"); + by_name = split_name_input(operator_input); + } + + // Import original circle file. + auto module = opselector::getModule(input_path); + + // Select nodes from user input. + std::vector<const luci::CircleNode *> selected_nodes; + + // put selected nodes into vector. + if (by_id.size()) + { + loco::Graph *graph = module.get()->graph(0); // get main subgraph. + + for (auto node : loco::all_nodes(graph)) + { + auto cnode = loco::must_cast<const luci::CircleNode *>(node); + + try + { + auto node_id = luci::get_node_id(cnode); // if the node is not operator, throw runtime_error + + for (auto selected_id : by_id) + if (selected_id == node_id) // find the selected id + selected_nodes.emplace_back(cnode); + } + catch (std::runtime_error) + { + continue; + } + } + } + if (by_name.size()) + { + loco::Graph *graph = module.get()->graph(0); // get main subgraph. + + for (auto node : loco::all_nodes(graph)) + { + auto cnode = loco::must_cast<const luci::CircleNode *>(node); + std::string node_name = cnode->name(); + + for (auto selected_name : by_name) + if (selected_name.compare(node_name) == 0) // find the selected name + selected_nodes.emplace_back(cnode); + } + } + if (selected_nodes.size() == 0) + { + std::cerr << "ERROR: No operator selected" << std::endl; + exit(EXIT_FAILURE); + } + // TODO implement node selections + + // Export to output Circle file + assert(opselector::exportModule(module.get(), output_path)); + + return 0; +} diff --git a/compiler/circle-opselector/requires.cmake b/compiler/circle-opselector/requires.cmake new file mode 100644 index 000000000..dcdbcbb68 --- /dev/null +++ b/compiler/circle-opselector/requires.cmake @@ -0,0 +1,6 @@ +require("foder") +require("loco") +require("safemain") +require("luci") +require("arser") +require("vconone") diff --git a/compiler/circle-opselector/src/Driver.test.cpp b/compiler/circle-opselector/src/Driver.test.cpp new file mode 100644 index 000000000..6e569085e --- /dev/null +++ b/compiler/circle-opselector/src/Driver.test.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Driver.test.h" +#include "TestHelper.h" + +#include <gtest/gtest.h> + +TEST(DriverTest, NoArg_NEG) +{ + Argv<1> argv; + argv.add("circle-opselector"); + + ::testing::internal::CaptureStderr(); + ::testing::internal::CaptureStdout(); + int result = entry(1, argv.argv()); + ::testing::internal::GetCapturedStdout(); + ASSERT_EQ(EXIT_FAILURE, result); +} + +TEST(DriverTest, Wrong_ID_NEG) +{ + std::string str1 = "1"; + std::string empty = ""; + std::string no_integer = "1531538X5"; + + ASSERT_EQ(true, is_number(str1)); + ASSERT_EQ(false, is_number(empty)); + ASSERT_EQ(false, is_number(no_integer)); +} + +TEST(DriverTest, Split) +{ + std::vector<uint32_t> vec1; + std::vector<uint32_t> vec2; + + std::string hyphen = "1-3,8-10"; + std::string comma = "1,2,3"; + + vec1.push_back(1); + vec1.push_back(2); + vec1.push_back(3); + vec1.push_back(8); + vec1.push_back(9); + vec1.push_back(10); + + vec2.push_back(1); + vec2.push_back(2); + vec2.push_back(3); + + ASSERT_EQ(vec1, split_id_input(hyphen)); + ASSERT_EQ(vec2, split_id_input(comma)); +} diff --git a/compiler/circle-opselector/src/Driver.test.h b/compiler/circle-opselector/src/Driver.test.h new file mode 100644 index 000000000..06f151649 --- /dev/null +++ b/compiler/circle-opselector/src/Driver.test.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_OPSELECTOR_DRIVER_TEST_H__ +#define __CIRCLE_OPSELECTOR_DRIVER_TEST_H__ + +#include <vector> +#include <string> + +int entry(int argc, char **argv); +bool is_number(const std::string &s); +std::vector<uint32_t> split_id_input(const std::string &str); + +#endif // __CIRCLE_OPSELECTOR_DRIVER_TEST_H__ diff --git a/compiler/circle-opselector/src/ModuleIO.cpp b/compiler/circle-opselector/src/ModuleIO.cpp new file mode 100644 index 000000000..46f45ceb0 --- /dev/null +++ b/compiler/circle-opselector/src/ModuleIO.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModuleIO.h" + +#include <foder/FileLoader.h> + +#include <luci/Importer.h> +#include <luci/CircleExporter.h> +#include <luci/CircleFileExpContract.h> + +#include <iostream> + +namespace opselector +{ + +std::unique_ptr<luci::Module> getModule(std::string &input_path) +{ + // Load model from the file + foder::FileLoader file_loader{input_path}; + std::vector<char> model_data = file_loader.load(); + + // Verify flatbuffers + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()}; + if (!circle::VerifyModelBuffer(verifier)) + { + std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl; + exit(EXIT_FAILURE); + } + + const circle::Model *circle_model = circle::GetModel(model_data.data()); + if (circle_model == nullptr) + { + std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl; + exit(EXIT_FAILURE); + } + + // Import from input Circle file + luci::Importer importer; + + return importer.importModule(circle_model); +} + +bool exportModule(luci::Module *module, std::string &output_path) +{ + luci::CircleExporter exporter; + + luci::CircleFileExpContract contract(module, output_path); + + if (!exporter.invoke(&contract)) + { + std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl; + return false; + } + + return true; +} + +} // namespace opselector diff --git a/compiler/circle-opselector/src/ModuleIO.h b/compiler/circle-opselector/src/ModuleIO.h new file mode 100644 index 000000000..39c704bf3 --- /dev/null +++ b/compiler/circle-opselector/src/ModuleIO.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_OPSELECTOR_MODULEIO_H__ +#define __CIRCLE_OPSELECTOR_MODULEIO_H__ + +#include <luci/IR/Module.h> + +#include <string> +#include <memory> + +namespace opselector +{ + +std::unique_ptr<luci::Module> getModule(std::string &input_path); +bool exportModule(luci::Module *module, std::string &output_path); + +} // namespace opselector + +#endif // __CIRCLE_OPSELECTOR_MODULEIO_H__ diff --git a/compiler/circle-opselector/src/ModuleIO.test.cpp b/compiler/circle-opselector/src/ModuleIO.test.cpp new file mode 100644 index 000000000..a1e5c2070 --- /dev/null +++ b/compiler/circle-opselector/src/ModuleIO.test.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ModuleIO.h" + +#include <gtest/gtest.h> + +TEST(ModuleIOTest, Export_nullptr) +{ + std::string output_path = "./test.out.circle"; + + ASSERT_EQ(false, opselector::exportModule(nullptr, output_path)); +} diff --git a/compiler/circle-opselector/src/TestHelper.h b/compiler/circle-opselector/src/TestHelper.h new file mode 100644 index 000000000..966e2b219 --- /dev/null +++ b/compiler/circle-opselector/src/TestHelper.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_OPSELECTOR_TEST_HELPER_H__ +#define __CIRCLE_OPSELECTOR_TEST_HELPER_H__ + +#include <cassert> +#include <string.h> + +template <size_t N> class Argv +{ +public: + typedef char *pchar_t; + +public: + ~Argv() + { + for (size_t n = 0; n < _ptr; ++n) + delete _argv[n]; + } + + void add(const char *in) + { + assert(_ptr < N); + _argv[_ptr] = new char[strlen(in) + 1]; + strncpy(_argv[_ptr], in, strlen(in) + 1); + _ptr++; + } + + pchar_t *argv(void) { return _argv; } + +private: + pchar_t _argv[N] = { + nullptr, + }; + size_t _ptr = 0; +}; + +#endif // __CIRCLE_OPSELECTOR_TEST_HELPER_H__ diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt index b4b1b19db..1cfbcbd9b 100644 --- a/compiler/circle-part-value-test/CMakeLists.txt +++ b/compiler/circle-part-value-test/CMakeLists.txt @@ -106,7 +106,7 @@ add_dependencies(circle_part_value_test_prepare common_artifacts_deps) add_test(NAME circle_part_value_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh" "${CMAKE_CURRENT_BINARY_DIR}" - "${NNCC_OVERLAY_DIR}/venv_2_3_0" + "${NNCC_OVERLAY_DIR}/venv_2_6_0" "$<TARGET_FILE:circle_part_driver>" ${PARTITION_LIST} ) diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md index e1a0258dc..5fd312e33 100644 --- a/compiler/circle-partitioner/README.md +++ b/compiler/circle-partitioner/README.md @@ -49,8 +49,8 @@ DIV=acl_cl - `backends`: Existing partition group names which nodes should be placed, in CSV format. - `default`: Default group name which should be one of `backends` item. - `comply`: How to group nodes of the model. - - currently `opcode` is supported - - future work: set group by node name or sequence number. + - currently `opcode` and `opname` are supported + - future work: set group by sequence number. ##### `[OPCODE`] section diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp index 5e717d085..1a09a8a2a 100644 --- a/compiler/circle-quantizer/src/CircleQuantizer.cpp +++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp @@ -43,6 +43,7 @@ void print_exclusive_options(void) std::cout << " --quantize_dequantize_weights" << std::endl; std::cout << " --quantize_with_minmax" << std::endl; std::cout << " --requantize" << std::endl; + std::cout << " --force_quantparam" << std::endl; } void print_version(void) @@ -63,6 +64,7 @@ int entry(int argc, char **argv) const std::string qdqw = "--quantize_dequantize_weights"; const std::string qwmm = "--quantize_with_minmax"; const std::string rq = "--requantize"; + const std::string fq = "--force_quantparam"; const std::string gpd = "--generate_profile_data"; @@ -105,6 +107,15 @@ int entry(int argc, char **argv) "Two arguments required: input_dtype(int8) " "output_dtype(uint8)"); + arser.add_argument(fq) + .nargs(3) + .type(arser::DataType::STR_VEC) + .required(false) + .accumulated(true) + .help("Write quantization parameters to the specified tensor. " + "Three arguments required: tensor_name(string), " + "scale(float) zero_point(int)"); + arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model"); arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model"); @@ -123,10 +134,11 @@ int entry(int argc, char **argv) } { - // only one of qdqw, qwmm, rq option can be used + // only one of qdqw, qwmm, rq, fq option can be used int32_t opt_used = arser[qdqw] ? 1 : 0; opt_used += arser[qwmm] ? 1 : 0; opt_used += arser[rq] ? 1 : 0; + opt_used += arser[fq] ? 1 : 0; if (opt_used != 1) { print_exclusive_options(); @@ -185,6 +197,34 @@ int entry(int argc, char **argv) options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1)); } + if (arser[fq]) + { + auto values = arser.get<std::vector<std::vector<std::string>>>(fq); + + std::vector<std::string> tensors; + std::vector<std::string> scales; + std::vector<std::string> zero_points; + + for (auto const value : values) + { + if (value.size() != 3) + { + std::cerr << arser; + return 255; + } + + tensors.push_back(value[0]); + scales.push_back(value[1]); + zero_points.push_back(value[2]); + } + + options->enable(Algorithms::ForceQuantParam); + + options->params(AlgorithmParameters::Quantize_tensor_names, tensors); + options->params(AlgorithmParameters::Quantize_scales, scales); + options->params(AlgorithmParameters::Quantize_zero_points, zero_points); + } + std::string input_path = arser.get<std::string>("input"); std::string output_path = arser.get<std::string>("output"); diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst index 95822c758..f41aac303 100644 --- a/compiler/circle2circle-dredd-recipe-test/test.lst +++ b/compiler/circle2circle-dredd-recipe-test/test.lst @@ -41,6 +41,7 @@ Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6) Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul) Add(MatMul_000 PASS resolve_customop_matmul) Add(DepthwiseConv2D_003 PASS) +Add(PadV2_001 PASS substitute_padv2_to_pad) Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape) Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax) Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax) diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index 1998b1646..a5ddb26dc 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -98,6 +98,12 @@ int entry(int argc, char **argv) .default_value(false) .help("This will fold dequantize op"); + arser.add_argument("--fold_dwconv") + .nargs(0) + .required(false) + .default_value(false) + .help("This will fold Depthwise Convolution operator with constant inputs"); + arser.add_argument("--fold_sparse_to_dense") .nargs(0) .required(false) @@ -116,6 +122,12 @@ int entry(int argc, char **argv) .default_value(false) .help("This will fuse Activation function to a preceding operator"); + arser.add_argument("--fuse_add_with_fully_connected") + .nargs(0) + .required(false) + .default_value(false) + .help("This will fuse Add operator to FullyConnected operator"); + arser.add_argument("--fuse_add_with_tconv") .nargs(0) .required(false) @@ -282,6 +294,12 @@ int entry(int argc, char **argv) .default_value(false) .help("This will convert certain condition PadV2 to Pad"); + arser.add_argument("--substitute_splitv_to_split") + .nargs(0) + .required(false) + .default_value(false) + .help("This will convert certain condition SplitV to Split operator"); + arser.add_argument("--substitute_squeeze_to_reshape") .nargs(0) .required(false) @@ -300,6 +318,12 @@ int entry(int argc, char **argv) .default_value(false) .help("This will convert single input Transpose to Reshape"); + arser.add_argument("--expand_broadcast_const") + .nargs(0) + .required(false) + .default_value(false) + .help("This will expand broadcastable constant inputs"); + arser.add_argument("--convert_nchw_to_nhwc") .nargs(0) .required(false) @@ -426,6 +450,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::FoldCast); if (arser.get<bool>("--fold_dequantize")) options->enable(Algorithms::FoldDequantize); + if (arser.get<bool>("--fold_dwconv")) + options->enable(Algorithms::FoldDepthwiseConv2D); if (arser.get<bool>("--fold_sparse_to_dense")) options->enable(Algorithms::FoldSparseToDense); if (arser.get<bool>("--forward_reshape_to_unaryop")) @@ -434,6 +460,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::FuseActivationFunction); if (arser.get<bool>("--fuse_batchnorm_with_conv")) options->enable(Algorithms::FuseBatchNormWithConv); + if (arser.get<bool>("--fuse_add_with_fully_connected")) + options->enable(Algorithms::FuseAddWithFullyConnected); if (arser.get<bool>("--fuse_add_with_tconv")) options->enable(Algorithms::FuseAddWithTConv); if (arser.get<bool>("--fuse_batchnorm_with_dwconv")) @@ -486,6 +514,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::SubstitutePackToReshape); if (arser.get<bool>("--substitute_padv2_to_pad")) options->enable(Algorithms::SubstitutePadV2ToPad); + if (arser.get<bool>("--substitute_splitv_to_split")) + options->enable(Algorithms::SubstituteSplitVToSplit); if (arser.get<bool>("--substitute_squeeze_to_reshape")) options->enable(Algorithms::SubstituteSqueezeToReshape); if (arser.get<bool>("--substitute_strided_slice_to_reshape")) @@ -496,6 +526,8 @@ int entry(int argc, char **argv) options->enable(Algorithms::TransformMinMaxToRelu6Pass); if (arser.get<bool>("--transform_min_relu_to_relu6")) options->enable(Algorithms::TransformMinReluToRelu6Pass); + if (arser.get<bool>("--expand_broadcast_const")) + options->enable(Algorithms::ExpandBroadcastConst); if (arser.get<bool>("--mute_warnings")) settings->set(luci::UserSettings::Key::MuteWarnings, true); diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt index 8ef68370d..7848ac722 100644 --- a/compiler/circledump/CMakeLists.txt +++ b/compiler/circledump/CMakeLists.txt @@ -11,6 +11,6 @@ target_include_directories(circledump PRIVATE include) target_link_libraries(circledump arser) target_link_libraries(circledump mio_circle) target_link_libraries(circledump safemain) -target_link_libraries(circledump flatbuffers) +target_link_libraries(circledump flatbuffers-1.10) install(TARGETS circledump DESTINATION bin) diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt index edca29b34..6de634a25 100644 --- a/compiler/common-artifacts/CMakeLists.txt +++ b/compiler/common-artifacts/CMakeLists.txt @@ -17,6 +17,8 @@ set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2") # Create python virtual environment with tensorflow 2.3.0 set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0") +# Create python virtual environment with tensorflow 2.6.0 +set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0") add_custom_command( OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2} @@ -27,11 +29,16 @@ add_custom_command( OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0} COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0} ) +add_custom_command( + OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0} + COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0} +) # Create requirements.txt and install required pip packages set(REQUIREMENTS_FILE "requirements.txt") set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}") set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}") +set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}") # TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0' # NOTE adding version is for temporary hotfix of setuptools 50.x.y version @@ -53,8 +60,23 @@ add_custom_command( DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0} ) +add_custom_command( + OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} + COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0 + COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade + DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0} +) + add_custom_target(common_artifacts_python_deps ALL - DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ${VIRTUALENV_OVERLAY_TF_2_3_0} ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} + DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} + ${VIRTUALENV_OVERLAY_TF_2_3_0} + ${VIRTUALENV_OVERLAY_TF_2_6_0} + ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} + ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} + ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} ) #[[ Generate common resources ]] diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt index ea10fbc4b..b2de2b34b 100644 --- a/compiler/enco/frontend/tflite/CMakeLists.txt +++ b/compiler/enco/frontend/tflite/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers QUIET) +nnas_find_package(FlatBuffers EXACT 1.10 QUIET) if(NOT FlatBuffers_FOUND) return() @@ -17,7 +17,7 @@ add_library(enco_tflite_frontend SHARED ${SOURCES}) target_include_directories(enco_tflite_frontend PRIVATE src) target_link_libraries(enco_tflite_frontend enco_intf_frontend) target_link_libraries(enco_tflite_frontend enco_intf_cmdline) -target_link_libraries(enco_tflite_frontend flatbuffers) +target_link_libraries(enco_tflite_frontend flatbuffers-1.10) target_link_libraries(enco_tflite_frontend enco_tflite_schema) target_link_libraries(enco_tflite_frontend morph) target_link_libraries(enco_tflite_frontend cwrap) diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt index e686cbb83..9d02f7cba 100644 --- a/compiler/exo/CMakeLists.txt +++ b/compiler/exo/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers QUIET) +nnas_find_package(FlatBuffers EXACT 1.10 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "Build exo: FALSE (missing FlatBuffers)") diff --git a/compiler/luci-interpreter/CMakeLists.txt b/compiler/luci-interpreter/CMakeLists.txt index ab4ec1f43..1f7acee87 100644 --- a/compiler/luci-interpreter/CMakeLists.txt +++ b/compiler/luci-interpreter/CMakeLists.txt @@ -4,4 +4,12 @@ if (NOT LUCI_INTERPRETER_PAL_DIR) set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux") endif() +set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst) + +if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX) + set(LUCI_INTERPRETER_SUFFIX "") +else() + set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX}) +endif() + add_subdirectory(src) diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md new file mode 100644 index 000000000..4a9a34e6d --- /dev/null +++ b/compiler/luci-interpreter/README.md @@ -0,0 +1,158 @@ +# luci-interpreter + +`luci-interpreter` is an inference engine for neural networks represented in luci IR. +See `compiler/luci/lang` directory for details about IR. +You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`. + +`luci-interpreter` provides: +- Basic inference functionality, input setters and output getters +- Interface for inspecting hidden interpreter state, like activation values during inference +- Customization mechanisms to fit the interpreter to specific platforms, like MCUs + +Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory + +## Basic usage + +Minimal usage includes: +- Setting input data +- Running inference +- Fetching inference results + +Interpreter object is reusable and can run multiple inferences. +Elements in tensors (input/output/internal) are stored contiguously and have C-like layout: +This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1. + +Input and output tensors have the same indexes as in original luci model. + +**Usage example:** +``` c++ +// Note getTensorSize is a function that computes tensor size, +// it is not part of interpreter and should be implemented by user + +luci_interpreter::Interpreter interpreter(luci_module); + +// Set inputs +// assuming model has only one input and one output +const auto input_nodes = loco::input_nodes(module->graph()); + +const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]); +std::vector<char> input_data(getTensorSize(input_node)); +// Initialize input data here + +interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + +// Start inference +interpreter.interpret(); + +// Fetch inference results +const auto output_nodes = loco::output_nodes(module->graph()); +const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]); +std::vector<char> output_data(getTensorSize(output_node)); +interpreter.readOutputTensor(output_node, output_data.data(), output_data.size()); +``` + +## Inspecting intermediate state + +Interpreter provides interfaces to investigate internal state of interpreter during inference. + +This is done by "observer" mechanism: +- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object +- `ExecutionObserver` defines several callback methods user can override to inject custom code + +ExecutionObserver provides three callbacks: +- `postTensorWrite` checks contents of output tensor after operation execution +- `preOperatorExecute` notifies that interpreter is going to execute operation +- `postOperatorExecute` notifies that interpreter has finished execution of an operation + +See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details. + +**Usage example:** +``` c++ +class CustomExecutionObserver: public luci_interpreter::ExecutionObserver +{ +public: + void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override + { + if (tensor->element_type() != loco::DataType::FLOAT32) + return; + for (int i = 0; i < tensor->shape().num_elements(); ++i) + std::cout << tensor->data<float>[i] << ", "; + } + + // User observer can override only needed methods, + // others will inherit empty implementation from base observer. + + // void preOperatorExecute(const luci::CircleNode *node); + // void postOperatorExecute(const luci::CircleNode *node); +}; + +luci_interpreter::Interpreter interpreter(module); +CustomExecutionObserver observer; +interpreter.attachObserver(&observer); + +// initialize input_data +interpreter.writeInputTensor(input_node, input_data.data(), input_data.size()); + +interpreter.interpret(); +``` + +## Customizing inference + +### Memory manager + +Interpreter provides a handle for altering default memory management mechanisms. + +This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details. + +This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory. + +User can construct an interpreter with one of predefined memory managers or their own custom memory manager. +Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. + +List of predefined memory managers: +- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one. +- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests. +- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete. +- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs). + +**SimpleMemoryManager usage example:** + +No need to select anything, to use this memory manager. +``` c++ +luci_interpreter::Interpreter interpreter(module); +``` + +**TestMemoryManager usage example:** + +``` c++ +luci_interpreter::TestMemoryManager mm; +luci_interpreter::Interpreter interpreter(module, &mm); +``` + +**BuddyMemoryManager usage example:** + +`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation. + +This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations. + +Limitations +- Current implementation uses only lower power-of-two bytes of given buffer. + + For example for 1000 bytes buffer, only lower 512 bytes will be used. +- Current implementation can handle maximum 4 gigabyte memory pool + +``` c++ + constexpr int buffer_size = 2048; + static uint8_t buffer[buffer_size]; + luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size); + luci_interpreter::Interpreter interpreter(module.get(), &memory_manager); +``` + +**StaticMemoryManager usage example:** +``` c++ +TBD when it is merged +``` + +## Further reading + +If you want to participate in development, please read `DEVELOPER.md` for SW architecture details. diff --git a/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h new file mode 100644 index 000000000..205baa626 --- /dev/null +++ b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h @@ -0,0 +1,144 @@ +/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/MemoryManager.h" + +#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H + +namespace luci_interpreter +{ + +class BuddyMemoryManager : public IMemoryManager +{ +public: + BuddyMemoryManager(uint8_t *memory_start, int32_t memSize); + + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + +private: + struct Block + { + Block *next_free; + bool is_free; + uint32_t size; + // debug field + Block *self; + }; + + Block *_start_block; + int32_t _num_blocks; + uint32_t _size; + Block *_free_blocks[32]{}; + + static int32_t lowerLog2(uint32_t val) + { + int32_t i = 0; + while (val >>= 1) + i++; + + return i; + } + + void addToBlocks(Block *block, int32_t l) + { + if (!block) + return; + + block->next_free = _free_blocks[l]; + _free_blocks[l] = block; + } + + void removeFromBlocks(const Block *block, int32_t l) + { + if (!block) + return; + + Block *tmp = _free_blocks[l]; + + if (block == tmp) + { + _free_blocks[l] = block->next_free; + return; + } + + while (tmp) + { + if (tmp->next_free == block) + { + tmp->next_free = block->next_free; + return; + } + + tmp = tmp->next_free; + } + } + + void divideBlock(Block *block, int32_t l) + { + int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block); + + removeFromBlocks(block, l); + + // there is no need to add to the free_blocks list here + block->is_free = true; + block->size = size; + block->self = block; + + Block *buddy; + buddy = (Block *)((uint8_t *)block + sizeof(Block) + size); + buddy->is_free = true; + buddy->size = size; + buddy->self = buddy; + + addToBlocks(buddy, l - 1); + } + + Block *mergeBlock(Block *block) + { + Block *buddy; + + const int32_t l = lowerLog2(block->size + sizeof(Block)); + + const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block); + buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block); + + if (!buddy->is_free || buddy->size != block->size) + return nullptr; + + if (block > buddy) + { + Block *x = block; + block = buddy; + buddy = x; + } + + removeFromBlocks(block, l); + removeFromBlocks(buddy, l); + + block->size = block->size * 2 + sizeof(Block); + block->is_free = true; + block->self = block; + + addToBlocks(block, l + 1); + + return block; + } +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h index 7a14bf6f8..7dee8a7f2 100644 --- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h +++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h @@ -22,6 +22,7 @@ #include <luci/IR/Nodes/CircleInput.h> #include <luci/IR/Nodes/CircleOutput.h> +#include "luci_interpreter/MemoryManager.h" #include <luci/IR/Module.h> #include <memory> @@ -49,7 +50,7 @@ public: class Interpreter { public: - explicit Interpreter(const luci::Module *module); + explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr); ~Interpreter(); @@ -64,7 +65,11 @@ public: const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; } private: + // _default_memory_manager should be before _runtime_module due to + // the order of deletion in the destructor + std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr; std::unique_ptr<class RuntimeModule> _runtime_module; + IMemoryManager *_memory_manager = nullptr; // Observer functionality support. std::unique_ptr<struct RuntimeToIR> _runtime_to_ir; diff --git a/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h new file mode 100644 index 000000000..f32c52095 --- /dev/null +++ b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_MEMORY_MANAGER_H + +#include "luci_interpreter/core/DataType.h" +#include "luci_interpreter/core/Tensor.h" + +namespace luci_interpreter +{ + +class IMemoryManager +{ +public: + virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0; + virtual void release_memory(luci_interpreter::Tensor &tensor) = 0; + + virtual ~IMemoryManager() = default; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H diff --git a/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h new file mode 100644 index 000000000..658a1c609 --- /dev/null +++ b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ + +class SimpleMemoryManager : public IMemoryManager +{ +public: + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H diff --git a/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h new file mode 100644 index 000000000..ded7bde79 --- /dev/null +++ b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ + +// Used for allocations in static buffer, using offsets defined in luci model. +class StaticMemoryManager : public IMemoryManager +{ +public: + StaticMemoryManager() = delete; + + explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr) + { /* Do nothing */ + } + + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + +private: + // Stores a pointer to the beginning of the allocated memory buffer. + uint8_t *_buffer_ptr; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H diff --git a/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h new file mode 100644 index 000000000..397bbed76 --- /dev/null +++ b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H +#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H + +#include "luci_interpreter/MemoryManager.h" + +namespace luci_interpreter +{ +// Memory Manager for using in kernels tests. This eliminates the need to manually delete the +// allocated memory in tests. This mem_manager remembers all its allocations and in destructor +// delete all allocations. +class TestMemoryManager : public IMemoryManager +{ +public: + void allocate_memory(luci_interpreter::Tensor &tensor) final; + void release_memory(luci_interpreter::Tensor &tensor) final; + + ~TestMemoryManager() override + { + for (auto allocation : allocations) + { + delete[] allocation; + } + } + +private: + std::vector<uint8_t *> allocations; +}; + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h index e356bce92..bb9ff6d4a 100644 --- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h +++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h @@ -107,9 +107,6 @@ public: return _quantization.zero_point[0]; } - void allocate(); - void deallocate(); - const std::vector<float> &scales() const { return _quantization.scale; } const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; } @@ -118,15 +115,16 @@ public: template <typename T> const T *data() const { - assert(_data_allocated); - return reinterpret_cast<const T *>(_data.get()); + static_assert(std::is_same<uint8_t, char>::value or + std::is_same<uint8_t, unsigned char>::value); + return reinterpret_cast<const T *>(_data); } template <typename T> T *data() { - if (!_data_allocated) - allocate(); - return reinterpret_cast<T *>(_data.get()); + static_assert(std::is_same<uint8_t, char>::value or + std::is_same<uint8_t, unsigned char>::value); + return reinterpret_cast<T *>(_data); } const std::string &name() const { return _name; } @@ -137,13 +135,50 @@ public: void resize(const Shape &new_shape); + void set_data_buffer(uint8_t *buffer) + { + if (buffer == nullptr) + { + _data_allocated = false; + } + else + { + _data_allocated = true; + } + _data = buffer; + } + + bool is_observable() const { return _is_observable; } + + void set_observable(bool value) { _is_observable = value; } + + bool is_allocatable() const { return _is_allocatable; } + + void set_allocatable(bool value) { _is_allocatable = value; } + + bool is_data_allocated() const { return _data_allocated; } + + int32_t get_offset() const { return _offset; } + + void set_offset(int32_t offset) { _offset = offset; } + private: DataType _element_type; Shape _shape; AffineQuantization _quantization; - std::unique_ptr<uint8_t[]> _data; + uint8_t *_data; std::string _name; bool _data_allocated; + // Write of tensor is reported to registered Observers only if this tensor is observable + // This is needed for tensors used in kernel implementation, but not present in original model. + bool _is_observable = true; + // Memory manager is called for tensor only if it is "allocatable". + // Kernel configuration could disable allocation of some tensors if they are not needed for + // particular operation. + bool _is_allocatable = true; + // Used by static memory manager. + // Stores the offset from the beginning of the allocated memory buffer. + int32_t _offset = -1; }; } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst new file mode 100644 index 000000000..9d541276c --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst @@ -0,0 +1,68 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LocalResponseNormalization) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(LogSoftmax) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Mean) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pack) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(Pow) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Relu) +REGISTER_KERNEL(Relu6) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(ReverseV2) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Slice) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(Split) +REGISTER_KERNEL(SplitV) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(Unpack) +REGISTER_KERNEL(While) diff --git a/compiler/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-interpreter/pal/linux/PALArgMax.h new file mode 100644 index 000000000..21e63296d --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h> + +namespace luci_interpreter_pal +{ +template <typename T1, typename T2, typename T3> +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater<T1> cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h new file mode 100644 index 000000000..3fe2022ed --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h new file mode 100644 index 000000000..2550dd5d7 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h> + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &im2col_shape, + float *im2col_data) +{ + if (im2col_data) + { + tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, im2col_shape, + im2col_data); + } + else + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &im2col_shape, + uint8 *im2col_data) +{ + // TODO This should only be done once (although it takes only a few microseconds). + // Also, the user should be able to adjust the number of threads. + auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>(); + gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency())); + + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, im2col_shape, + im2col_data, gemmlowp_context.get()); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &im2col_shape, + int8 *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + // TODO enable optimized version + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h new file mode 100644 index 000000000..f9ebfcfb5 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-interpreter/pal/linux/PALElu.h new file mode 100644 index 000000000..cb365ffd0 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALElu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h new file mode 100644 index 000000000..6c663e21f --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h new file mode 100644 index 000000000..aac57f2b2 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h new file mode 100644 index 000000000..e8209bae6 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h new file mode 100644 index 000000000..54f7f0916 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H +#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void +LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H diff --git a/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h new file mode 100644 index 000000000..a32e3eec6 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H +#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta); +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + // Do nothing for linux + (void)params; + (void)input_scale; + (void)beta; +} + +static inline void LogSoftmax(const tflite::SoftmaxParams ¶ms, float input_scale, + const tflite::RuntimeShape &input_shape, const uint8 *input_data, + const tflite::RuntimeShape &output_shape, uint8 *output_data) +{ + tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h new file mode 100644 index 000000000..cfaec1b58 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const float *input1_data, const tflite::RuntimeShape &input2_shape, + const float *input2_data, const tflite::RuntimeShape &output_shape, + float *output_data) +{ + tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} + +static inline void BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, + const float *input1_data, + const tflite::RuntimeShape &input2_shape, + const float *input2_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-interpreter/pal/linux/PALNeg.h new file mode 100644 index 000000000..797ffee1b --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-interpreter/pal/linux/PALRelu.h new file mode 100644 index 000000000..b4c715d3e --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALRelu.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RELU_H +#define LUCI_INTERPRETER_PAL_RELU_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data); +} + +template <typename T> +static inline void ReluX(const tflite::ReluParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RELU_H diff --git a/compiler/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-interpreter/pal/linux/PALRelu6.h new file mode 100644 index 000000000..bf2f91aa5 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALRelu6.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RELU6_H +#define LUCI_INTERPRETER_PAL_RELU6_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data); +} + +template <typename T> +static inline void ReluX(const tflite::ReluParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RELU6_H diff --git a/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h new file mode 100644 index 000000000..7380081dc --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h new file mode 100644 index 000000000..74d19265b --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-interpreter/pal/linux/PALSlice.h new file mode 100644 index 000000000..640a71684 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSlice.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SLICE_H +#define LUCI_INTERPRETER_PAL_SLICE_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Slice(const tflite::SliceParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SLICE_H diff --git a/compiler/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-interpreter/pal/linux/PALSoftmax.h new file mode 100644 index 000000000..b197e79d1 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSoftmax.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta); +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + // Do nothing for linux + (void)params; + (void)input_scale; + (void)beta; +} + +template <typename In, typename Out> +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const In *input_data, + const tflite::RuntimeShape &output_shape, Out *output_data) +{ + tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h new file mode 100644 index 000000000..5e8de9ba3 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h new file mode 100644 index 000000000..52d2a5bb1 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-interpreter/pal/linux/PALSplit.h new file mode 100644 index 000000000..4d8da72d8 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSplit.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPLIT_H +#define LUCI_INTERPRETER_PAL_SPLIT_H + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename Scalar> +static inline void Split(const tflite::SplitParams ¶ms, const tflite::RuntimeShape &input_shape, + const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes, + Scalar *const *output_data) +{ + tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPLIT_H diff --git a/compiler/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-interpreter/pal/linux/PALSub.h new file mode 100644 index 000000000..04080d619 --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake index da880c64c..84349e0bf 100644 --- a/compiler/luci-interpreter/pal/linux/pal.cmake +++ b/compiler/luci-interpreter/pal/linux/pal.cmake @@ -1,8 +1,8 @@ macro(initialize_pal) - nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET) - nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET) - nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET) - nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) if (NOT TensorFlowSource_FOUND) message(STATUS "Skipping luci-interpreter: TensorFlow not found") @@ -43,7 +43,12 @@ macro(add_pal_to_target TGT) set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES}) set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE "${TensorFlowSource_DIR}") + target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal) endmacro() diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst new file mode 100644 index 000000000..771974afe --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst @@ -0,0 +1,56 @@ +REGISTER_KERNEL(Add) +REGISTER_KERNEL(ArgMax) +REGISTER_KERNEL(AveragePool2D) +REGISTER_KERNEL(BatchToSpaceND) +REGISTER_KERNEL(Cast) +REGISTER_KERNEL(Concatenation) +REGISTER_KERNEL(Conv2D) +REGISTER_KERNEL(DepthToSpace) +REGISTER_KERNEL(DepthwiseConv2D) +REGISTER_KERNEL(Div) +REGISTER_KERNEL(Elu) +REGISTER_KERNEL(Exp) +REGISTER_KERNEL(Floor) +REGISTER_KERNEL(FloorDiv) +REGISTER_KERNEL(Equal) +REGISTER_KERNEL(FullyConnected) +REGISTER_KERNEL(Greater) +REGISTER_KERNEL(GreaterEqual) +REGISTER_KERNEL(If) +REGISTER_KERNEL(InstanceNorm) +REGISTER_KERNEL(L2Normalize) +REGISTER_KERNEL(L2Pool2D) +REGISTER_KERNEL(LeakyRelu) +REGISTER_KERNEL(Less) +REGISTER_KERNEL(LessEqual) +REGISTER_KERNEL(LogicalAnd) +REGISTER_KERNEL(LogicalNot) +REGISTER_KERNEL(LogicalOr) +REGISTER_KERNEL(Logistic) +REGISTER_KERNEL(Maximum) +REGISTER_KERNEL(MaxPool2D) +REGISTER_KERNEL(Minimum) +REGISTER_KERNEL(MirrorPad) +REGISTER_KERNEL(Mul) +REGISTER_KERNEL(Neg) +REGISTER_KERNEL(NotEqual) +REGISTER_KERNEL(Pad) +REGISTER_KERNEL(PadV2) +REGISTER_KERNEL(PRelu) +REGISTER_KERNEL(Reshape) +REGISTER_KERNEL(ResizeBilinear) +REGISTER_KERNEL(ResizeNearestNeighbor) +REGISTER_KERNEL(Rsqrt) +REGISTER_KERNEL(Softmax) +REGISTER_KERNEL(SpaceToBatchND) +REGISTER_KERNEL(SpaceToDepth) +REGISTER_KERNEL(StridedSlice) +REGISTER_KERNEL(Sqrt) +REGISTER_KERNEL(Square) +REGISTER_KERNEL(SquaredDifference) +REGISTER_KERNEL(Squeeze) +REGISTER_KERNEL(Sub) +REGISTER_KERNEL(Tanh) +REGISTER_KERNEL(Transpose) +REGISTER_KERNEL(TransposeConv) +REGISTER_KERNEL(While) diff --git a/compiler/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-interpreter/pal/mcu/PALArgMax.h new file mode 100644 index 000000000..21e63296d --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALArgMax.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h> + +namespace luci_interpreter_pal +{ +template <typename T1, typename T2, typename T3> +static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data, + const T2 *axis, const tflite::RuntimeShape &output_shape, + T3 *output_data, const std::greater<T1> cmp) +{ + tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ARGMAX_H diff --git a/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h new file mode 100644 index 000000000..4dd77ffdc --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H +#define LUCI_INTERPRETER_PAL_ARGMAX_H + +#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::BatchToSpaceND( + unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, crops_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h new file mode 100644 index 000000000..0a8ae4e48 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_CONV2D_H +#define LUCI_INTERPRETER_PAL_CONV2D_H + +#include <tensorflow/lite/kernels/internal/reference/conv.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h> + +namespace luci_interpreter_pal +{ +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const float *input_data, const tflite::RuntimeShape &filter_shape, + const float *filter_data, const tflite::RuntimeShape &bias_shape, + const float *bias_data, const tflite::RuntimeShape &output_shape, + float *output_data, const tflite::RuntimeShape &im2col_shape, + float *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, + tflite::RuntimeShape(), nullptr); +} + +static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, + const uint8 *input_data, const tflite::RuntimeShape &filter_shape, + const uint8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + uint8 *output_data, const tflite::RuntimeShape &im2col_shape, + uint8 *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data, im2col_shape, + im2col_data, nullptr); +} + +static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const int8 *input_data, const tflite::RuntimeShape &filter_shape, + const int8 *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + int8 *output_data, const tflite::RuntimeShape &im2col_shape, + int8 *im2col_data) +{ + (void)im2col_shape; + (void)im2col_data; + tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data, + filter_shape, filter_data, bias_shape, bias_data, + output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_CONV2D_H diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h new file mode 100644 index 000000000..8463e571e --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H +#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H + +#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H diff --git a/compiler/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-interpreter/pal/mcu/PALElu.h new file mode 100644 index 000000000..4089d0a0c --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALElu.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_ELU_H +#define LUCI_INTERPRETER_PAL_ELU_H + +#include <tensorflow/lite/kernels/internal/reference/elu.h> + +namespace luci_interpreter_pal +{ + +static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data); +} + +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_ELU_H diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h new file mode 100644 index 000000000..f84742a44 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H +#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H + +#include <tensorflow/lite/kernels/internal/reference/l2normalization.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Normalization(const tflite::L2NormalizationParams &op_params, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape, + output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h new file mode 100644 index 000000000..38a302fc6 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H +#define LUCI_INTERPRETER_PAL_L2POOL2D_H + +#include <tensorflow/lite/kernels/internal/reference/pooling.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void L2Pool(const tflite::PoolParams ¶ms, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &output_shape, + T *output_data) +{ + tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H diff --git a/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h new file mode 100644 index 000000000..9ccd2224f --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H +#define LUCI_INTERPRETER_PAL_LEAKYRELU_H + +#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h> + +namespace luci_interpreter_pal +{ +static inline void LeakyRelu(const tflite::LeakyReluParams ¶ms, + const tflite::RuntimeShape &input_shape, const float *input_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h new file mode 100644 index 000000000..2b46b100c --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALMul.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_MUL_H +#define LUCI_INTERPRETER_PAL_MUL_H + +#include <tensorflow/lite/kernels/internal/reference/mul.h> + +namespace luci_interpreter_pal +{ +static inline void Mul(tflite::ArithmeticParams ¶ms, const tflite::RuntimeShape &input1_shape, + const float *input1_data, const tflite::RuntimeShape &input2_shape, + const float *input2_data, const tflite::RuntimeShape &output_shape, + float *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} + +static inline void BroadcastMul4DSlow(tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, + const float *input1_data, + const tflite::RuntimeShape &input2_shape, + const float *input2_data, + const tflite::RuntimeShape &output_shape, float *output_data) +{ + tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_MUL_H diff --git a/compiler/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-interpreter/pal/mcu/PALNeg.h new file mode 100644 index 000000000..be5903a0c --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALNeg.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_NEG_H +#define LUCI_INTERPRETER_PAL_NEG_H + +#include <tensorflow/lite/kernels/internal/reference/neg.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_NEG_H diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h new file mode 100644 index 000000000..cc9f0fd54 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H +#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H + +#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeBilinear(const tflite::ResizeBilinearParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h new file mode 100644 index 000000000..f4d5a6ed3 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H +#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H + +#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, const T *input_data, + const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data, + output_size_shape, output_size_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H diff --git a/compiler/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h new file mode 100644 index 000000000..9838b542d --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H +#define LUCI_INTERPRETER_PAL_SOFTMAX_H + +#include <tensorflow/lite/kernels/internal/reference/softmax.h> + +namespace luci_interpreter_pal +{ +static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale, + float beta) +{ + // Do nothing for mcu + (void)data; + (void)input_scale; + (void)beta; +} + +static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta) +{ + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, &input_beta_left_shift); + + params->input_multiplier = input_beta_multiplier; + params->input_left_shift = input_beta_left_shift; + params->diff_min = + -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift); +} + +template <typename T> +static inline void Softmax(const tflite::SoftmaxParams ¶ms, + const tflite::RuntimeShape &input_shape, const T *input_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + // MARK: At this moment this operation doesn't support on mcu + assert(false && "Softmax NYI"); + (void)params; + (void)input_shape; + (void)input_data; + (void)output_shape; + (void)output_data; +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h new file mode 100644 index 000000000..fdddaa929 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H +#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H + +#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void +SpaceToBatchND(const tflite::SpaceToBatchParams ¶ms, + const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data, + const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data, + const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToBatchND( + params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data, + unextended_input3_shape, paddings_data, unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h new file mode 100644 index 000000000..816b7f663 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H +#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H + +#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params, + const tflite::RuntimeShape &unextended_input_shape, + const T *input_data, + const tflite::RuntimeShape &unextended_output_shape, T *output_data) +{ + tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data, + unextended_output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H diff --git a/compiler/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-interpreter/pal/mcu/PALSub.h new file mode 100644 index 000000000..ea57578c6 --- /dev/null +++ b/compiler/luci-interpreter/pal/mcu/PALSub.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_PAL_SUB_H +#define LUCI_INTERPRETER_PAL_SUB_H + +#include <tensorflow/lite/kernels/internal/reference/sub.h> + +namespace luci_interpreter_pal +{ +template <typename T> +static inline void Sub(const tflite::ArithmeticParams ¶ms, + const tflite::RuntimeShape &input1_shape, const T *input1_data, + const tflite::RuntimeShape &input2_shape, const T *input2_data, + const tflite::RuntimeShape &output_shape, T *output_data) +{ + tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data, + output_shape, output_data); +} +} // namespace luci_interpreter_pal + +#endif // LUCI_INTERPRETER_PAL_SUB_H diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake index 2307ac727..a479d407b 100644 --- a/compiler/luci-interpreter/pal/mcu/pal.cmake +++ b/compiler/luci-interpreter/pal/mcu/pal.cmake @@ -1,8 +1,8 @@ macro(initialize_pal) - nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET) - nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET) - nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET) - nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET) + nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET) + nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET) if (NOT TensorFlowSource_FOUND) message(STATUS "Skipping luci-interpreter: TensorFlow not found") @@ -30,7 +30,7 @@ endmacro() macro(add_pal_to_target TGT) target_include_directories(${TGT} PRIVATE "${PAL}") - target_include_directories(${TGT} SYSTEM PRIVATE + target_include_directories(${TGT} PRIVATE "${TensorFlowRuySource_DIR}" "${TensorFlowGEMMLowpSource_DIR}" "${TensorFlowEigenSource_DIR}" @@ -42,7 +42,12 @@ macro(add_pal_to_target TGT) set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES}) set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_include_directories(luci_interpreter_mcu_pal SYSTEM PRIVATE "${TensorFlowSource_DIR}") + target_include_directories(luci_interpreter_mcu_pal PRIVATE + "${TensorFlowRuySource_DIR}" + "${TensorFlowGEMMLowpSource_DIR}" + "${TensorFlowEigenSource_DIR}" + "${TensorFlowSource_DIR}" + ) target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal) #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal) diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp new file mode 100644 index 000000000..6ad1f320c --- /dev/null +++ b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" + +namespace luci_interpreter +{ + +BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize) +{ + int32_t p = lowerLog2(memSize); + + // We assume that the requested size of memory does not exceed 4 GB + assert(p < 32); + memSize = 1 << p; + + _start_block = reinterpret_cast<Block *>(memory_start); + _start_block->size = memSize - sizeof(Block); + _start_block->is_free = true; + _start_block->self = _start_block; + _num_blocks = 0; + _size = _start_block->size; + + for (auto &_free_block : _free_blocks) + _free_block = nullptr; + + addToBlocks(_start_block, p); +} + +void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + const size_t element_size = getDataTypeSize(tensor.element_type()); + const int32_t num_elements = tensor.shape().num_elements(); + auto size = num_elements * element_size; + auto footprint = size + sizeof(Block); + auto l = (footprint & (footprint - 1)) == 0 + ? lowerLog2(footprint) + : lowerLog2(footprint) + 1; // check footprint is pow_of_2 + + while (l < 32 && !_free_blocks[l]) + l++; + + assert(l < 32); + + Block *tmp; + tmp = _free_blocks[l]; + removeFromBlocks(tmp, l); + + while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block)) + { + divideBlock(tmp, l); + l--; + } + + tmp->is_free = false; + tmp->self = tmp; + _num_blocks++; + + auto *data = (uint8_t *)(tmp + 1); + tensor.set_data_buffer(data); +} + +void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + auto data = tensor.data<void>(); + auto *tmp = (Block *)((uint8_t *)data - sizeof(Block)); + + assert(tmp->self == tmp); + + tmp->is_free = true; + addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block))); + + while (tmp) + if (tmp->size == _size) + break; + else + tmp = mergeBlock(tmp); + + _num_blocks--; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp new file mode 100644 index 000000000..29fb767b7 --- /dev/null +++ b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/BuddyMemoryManager.h" +#include <gtest/gtest.h> + +namespace luci_interpreter +{ +namespace +{ + +using namespace testing; + +TEST(BuddyMemoryManager, basic) +{ + auto mem_pool = std::make_unique<uint8_t[]>(200); + auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130); + Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor"); + + buddy_memory_manager->allocate_memory(first_tensor); + + uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + + first_tensor.writeData(data_1, 8); + uint8_t array_1[8]; + first_tensor.readData(array_1, 8); + for (int i = 0; i < 8; i++) + { + EXPECT_EQ(data_1[i], array_1[i]); + } + + Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor"); + buddy_memory_manager->allocate_memory(second_tensor); + + uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}}; + second_tensor.writeData(data_2, 10); + + uint8_t array_2[2][5]; + second_tensor.readData(array_2, 10); + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 5; j++) + { + EXPECT_EQ(data_2[i][j], array_2[i][j]); + } + } + + buddy_memory_manager->release_memory(first_tensor); + EXPECT_EQ(first_tensor.data<void>(), nullptr); + + buddy_memory_manager->release_memory(second_tensor); + EXPECT_EQ(second_tensor.data<void>(), nullptr); +} + +} // namespace +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt index 6f34b6117..e37150336 100644 --- a/compiler/luci-interpreter/src/CMakeLists.txt +++ b/compiler/luci-interpreter/src/CMakeLists.txt @@ -1,13 +1,19 @@ -include(${LUCI_INTERPRETER_PAL_DIR}/pal.cmake) +include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake") initialize_pal() if (NOT PAL_INITIALIZED) + message("PAL Failed to initialize, skip luci-interpreter") return() endif() message(STATUS "LUCI INTERPRETER BEGIN") +set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}") +set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}") + add_subdirectory(core) message(STATUS "LUCI INTERPRETER CORE") add_subdirectory(kernels) @@ -19,15 +25,34 @@ message(STATUS "LUCI INTERPTER INITALIZED") set(SOURCES "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h" - Interpreter.cpp) - -add_library(luci_interpreter SHARED ${SOURCES}) -target_include_directories(luci_interpreter PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") -target_include_directories(luci_interpreter PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}") -target_link_libraries(luci_interpreter - PUBLIC luci_lang luci_interpreter_loader luci_interpreter_core + Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp) + +if (NOT LUCI_INTERPRETER_STATIC) + add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES}) +else () + add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES}) +endif () + +set(TEST_SOURCES BuddyMemoryManager.test.cpp) + +target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_BINARY} + PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE} PRIVATE nncc_common) -install(TARGETS luci_interpreter DESTINATION lib) +install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(buddy_manager_test ${TEST_SOURCES}) +target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY}) diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp index b57b691d0..1b8792a6c 100644 --- a/compiler/luci-interpreter/src/Interpreter.cpp +++ b/compiler/luci-interpreter/src/Interpreter.cpp @@ -15,6 +15,7 @@ */ #include "luci_interpreter/Interpreter.h" +#include "luci_interpreter/SimpleMemoryManager.h" #include "loader/ModuleLoader.h" @@ -69,12 +70,25 @@ private: } // namespace -Interpreter::Interpreter(const luci::Module *module) +Interpreter::Interpreter(const luci::Module *module, + luci_interpreter::IMemoryManager *memory_manager) { _runtime_to_ir = std::make_unique<RuntimeToIR>(); _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers); _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get()); - ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor); + + if (memory_manager == nullptr) + { + _default_memory_manager = std::make_unique<SimpleMemoryManager>(); + _memory_manager = _default_memory_manager.get(); + } + else + { + _memory_manager = memory_manager; + } + + ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor, + _memory_manager); loader.load(); } diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp new file mode 100644 index 000000000..230e39896 --- /dev/null +++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/SimpleMemoryManager.h" + +namespace luci_interpreter +{ + +void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + tensor.set_data_buffer(data); +} + +void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_data_allocated()) + { + tensor.set_data_buffer(nullptr); + return; + } + auto data = tensor.data<uint8_t>(); + delete[] data; + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-interpreter/src/StaticMemoryManager.cpp new file mode 100644 index 000000000..73a819919 --- /dev/null +++ b/compiler/luci-interpreter/src/StaticMemoryManager.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/StaticMemoryManager.h" + +namespace luci_interpreter +{ + +void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + int32_t offset = tensor.get_offset(); + assert(offset >= 0); + auto tensor_ptr = _buffer_ptr + offset; + tensor.set_data_buffer(tensor_ptr); +} + +void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-interpreter/src/TestMemoryManager.cpp new file mode 100644 index 000000000..3beeee55c --- /dev/null +++ b/compiler/luci-interpreter/src/TestMemoryManager.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ + +void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) +{ + if (!tensor.is_allocatable()) + { + return; + } + if (tensor.is_data_allocated()) + { + release_memory(tensor); + } + const auto element_size = getDataTypeSize(tensor.element_type()); + const auto num_elements = tensor.shape().num_elements(); + + auto *data = new uint8_t[num_elements * element_size]; + allocations.push_back(data); + tensor.set_data_buffer(data); +} + +void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor) +{ + tensor.set_data_buffer(nullptr); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt index e576dbd94..4430cba11 100644 --- a/compiler/luci-interpreter/src/core/CMakeLists.txt +++ b/compiler/luci-interpreter/src/core/CMakeLists.txt @@ -9,9 +9,9 @@ set(SOURCES RuntimeModule.h Tensor.cpp) -add_library(luci_interpreter_core STATIC ${SOURCES}) -set_target_properties(luci_interpreter_core PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") -target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") -target_link_libraries(luci_interpreter_core PUBLIC luci_lang) -target_link_libraries(luci_interpreter_core PRIVATE nncc_common) +add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES}) +set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}") +target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") +target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang) +target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common) diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h index 5cdb2e360..a7c4a4218 100644 --- a/compiler/luci-interpreter/src/core/Kernel.h +++ b/compiler/luci-interpreter/src/core/Kernel.h @@ -36,8 +36,8 @@ protected: public: virtual ~Kernel() = default; - std::vector<const Tensor *> getInputTensors() const { return _inputs; } - std::vector<Tensor *> getOutputTensors() const { return _outputs; } + const std::vector<const Tensor *> &getInputTensors() const { return _inputs; } + const std::vector<Tensor *> &getOutputTensors() const { return _outputs; } // Configures the kernel. // This function is currently called once for each kernel during interpreter construction, diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp index fb0ad304b..c2f8d2ea8 100644 --- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp +++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp @@ -29,8 +29,10 @@ class RuntimeGraph::TensorAllocPlan std::vector<std::vector<Tensor *>> _alloc_plan; std::vector<std::vector<Tensor *>> _dealloc_plan; bool _valid = false; + IMemoryManager *_memory_manager; public: + explicit TensorAllocPlan(IMemoryManager *memory_manager); void invalidate() { _valid = false; } bool isValid() const { return _valid; } void build(const RuntimeGraph &graph); @@ -38,6 +40,11 @@ public: void deallocate(size_t kernel_index) const; }; +RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager) + : _memory_manager(memory_manager) +{ +} + void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph) { invalidate(); @@ -80,7 +87,7 @@ void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const assert(_valid && kernel_index < _alloc_plan.size()); for (Tensor *tensor : _alloc_plan[kernel_index]) { - tensor->allocate(); + _memory_manager->allocate_memory(*tensor); } } @@ -89,16 +96,24 @@ void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const assert(_valid && kernel_index < _dealloc_plan.size()); for (Tensor *tensor : _dealloc_plan[kernel_index]) { - tensor->deallocate(); + _memory_manager->release_memory(*tensor); } } -RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module) - : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>()) +RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager) + : _owning_module(owning_module), _memory_manager(memory_manager), + _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager)) { } -RuntimeGraph::~RuntimeGraph() {} +RuntimeGraph::~RuntimeGraph() +{ + for (auto &tensor : _tensors) + { + if (tensor->is_data_allocated()) + _memory_manager->release_memory(*tensor); + } +} Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor) { @@ -121,6 +136,11 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors) _output_tensors = output_tensors; } +void RuntimeGraph::configureAllocations(Tensor *tensor) +{ + _memory_manager->allocate_memory(*tensor); +} + void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel) { assert(kernel != nullptr); @@ -140,7 +160,8 @@ void RuntimeGraph::execute() const { for (const Tensor *input_tensor : getInputTensors()) { - event_notifier->postTensorWrite(input_tensor); + if (input_tensor->is_observable()) + event_notifier->postTensorWrite(input_tensor); } } @@ -155,11 +176,10 @@ void RuntimeGraph::execute() const // TODO The `configure` method should only be called if the outputs of an operator need to be // resized. kernel->configure(); -// TODO decide where to allocate memory, and uncomment/remove this if -#if 0 - _tensor_alloc_plan->allocate( - index); // Preallocate outputs in advance instead of relying on automatic allocation -#endif + + // Preallocate outputs in advance instead of relying on automatic allocation + _tensor_alloc_plan->allocate(index); + kernel->execute(); if (event_notifier != nullptr) @@ -169,7 +189,7 @@ void RuntimeGraph::execute() const for (const Tensor *tensor : kernel->getOutputTensors()) { - if (event_notifier != nullptr) + if (event_notifier != nullptr && tensor->is_observable()) { event_notifier->postTensorWrite(tensor); } diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-interpreter/src/core/RuntimeGraph.h index 5f732025d..8184e249d 100644 --- a/compiler/luci-interpreter/src/core/RuntimeGraph.h +++ b/compiler/luci-interpreter/src/core/RuntimeGraph.h @@ -18,6 +18,7 @@ #define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H #include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" #include "core/Kernel.h" #include <memory> @@ -35,7 +36,7 @@ private: friend class TensorAllocPlan; public: - explicit RuntimeGraph(RuntimeModule *owning_module); + explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager); ~RuntimeGraph(); Tensor *addTensor(std::unique_ptr<Tensor> &&tensor); @@ -43,6 +44,8 @@ public: void setInputTensors(const std::vector<Tensor *> &input_tensors); void setOutputTensors(const std::vector<Tensor *> &output_tensors); + void configureAllocations(Tensor *tensor); + const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; } const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; } @@ -51,6 +54,7 @@ public: void execute() const; private: + IMemoryManager *_memory_manager; RuntimeModule *_owning_module; std::vector<std::unique_ptr<Tensor>> _tensors; std::vector<Tensor *> _input_tensors; diff --git a/compiler/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-interpreter/src/core/RuntimeModule.h index dccc3a173..78873b0ec 100644 --- a/compiler/luci-interpreter/src/core/RuntimeModule.h +++ b/compiler/luci-interpreter/src/core/RuntimeModule.h @@ -19,6 +19,7 @@ #include "core/RuntimeGraph.h" #include "core/EventNotifier.h" +#include "luci_interpreter/MemoryManager.h" #include <memory> #include <vector> @@ -33,9 +34,9 @@ public: EventNotifier *getEventNotifier() const { return _event_notifier; } - RuntimeGraph *addGraph() + RuntimeGraph *addGraph(IMemoryManager *memory_manager) { - _graphs.push_back(std::make_unique<RuntimeGraph>(this)); + _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager)); return _graphs.back().get(); } diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp index a9e7be0a9..3c3c5ffff 100644 --- a/compiler/luci-interpreter/src/core/Tensor.cpp +++ b/compiler/luci-interpreter/src/core/Tensor.cpp @@ -29,21 +29,6 @@ Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantizati { } -void Tensor::allocate() -{ - deallocate(); - const size_t element_size = getDataTypeSize(_element_type); - const int32_t num_elements = _shape.num_elements(); - _data = std::make_unique<uint8_t[]>(num_elements * element_size); - _data_allocated = true; -} - -void Tensor::deallocate() -{ - _data_allocated = false; - _data.reset(); -} - void Tensor::readData(void *data_ptr, size_t data_size) const { const size_t element_size = getDataTypeSize(element_type()); @@ -68,10 +53,6 @@ void Tensor::writeData(const void *data_ptr, size_t data_size) std::memcpy(data<void>(), data_ptr, data_size); } -void Tensor::resize(const Shape &new_shape) -{ - deallocate(); - _shape = new_shape; -} +void Tensor::resize(const Shape &new_shape) { _shape = new_shape; } } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp index 5ad9beb30..847b65667 100644 --- a/compiler/luci-interpreter/src/kernels/Add.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Add.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,6 +28,14 @@ namespace using namespace testing; +class AddTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + // for quantized Add, the error shouldn't exceed step float GetTolerance(float min, float max) { @@ -34,7 +43,7 @@ float GetTolerance(float min, float max) return kQuantizedStep; } -TEST(AddTest, Uint8) +TEST_F(AddTest, Uint8) { std::initializer_list<int32_t> base_shape = {2, 3, 1, 2}; std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, @@ -57,10 +66,10 @@ TEST(AddTest, Uint8) std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f); for (int i = 0; i < output_data.size(); i++) { - Tensor input1_tensor = - makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data); - Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first, - quant_param.second, test_data); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); @@ -69,6 +78,7 @@ TEST(AddTest, Uint8) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -78,10 +88,10 @@ TEST(AddTest, Uint8) // Re-run with exchanged inputs. for (int i = 0; i < output_data.size(); i++) { - Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first, - quant_param.second, test_data); - Tensor input2_tensor = - makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); @@ -90,6 +100,7 @@ TEST(AddTest, Uint8) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -98,7 +109,7 @@ TEST(AddTest, Uint8) } } -TEST(AddTest, Float) +TEST_F(AddTest, Float) { Shape base_shape = {2, 3, 1, 2}; std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; @@ -116,8 +127,10 @@ TEST(AddTest, Float) std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); AddParams params{}; @@ -125,6 +138,7 @@ TEST(AddTest, Float) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) @@ -133,8 +147,10 @@ TEST(AddTest, Float) // Re-run with exchanged inputs. for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); AddParams params{}; @@ -142,6 +158,7 @@ TEST(AddTest, Float) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) @@ -149,7 +166,7 @@ TEST(AddTest, Float) } } -TEST(AddTest, SInt16) +TEST_F(AddTest, SInt16) { Shape base_shape = {2, 3, 1, 2}; std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; @@ -171,9 +188,10 @@ TEST(AddTest, SInt16) for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data); - Tensor input2_tensor = - makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data); + Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); const float tolerance = output_tensor.scale(); @@ -182,6 +200,7 @@ TEST(AddTest, SInt16) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), @@ -193,9 +212,10 @@ TEST(AddTest, SInt16) // Re-run with exchanged inputs and different scales. for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = - makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data); - Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data); + Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0); const float tolerance = output_tensor.scale(); @@ -204,6 +224,7 @@ TEST(AddTest, SInt16) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), @@ -214,10 +235,10 @@ TEST(AddTest, SInt16) } } -TEST(AddTest, Input_Output_Type_NEG) +TEST_F(AddTest, Input_Output_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); AddParams params{}; @@ -227,10 +248,10 @@ TEST(AddTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(AddTest, Invalid_Input_Type_NEG) +TEST_F(AddTest, Invalid_Input_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}); - Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}); + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); AddParams params{}; @@ -238,6 +259,7 @@ TEST(AddTest, Invalid_Input_Type_NEG) Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp index 2437d5762..6561a1783 100644 --- a/compiler/luci-interpreter/src/kernels/ArgMax.cpp +++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp @@ -16,7 +16,7 @@ #include "kernels/ArgMax.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALArgMax.h" namespace luci_interpreter { @@ -60,10 +60,10 @@ void ArgMax::configure() void ArgMax::execute() const { -#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ - tflite::optimized_ops::ArgMinMax( \ - getTensorShape(input()), getTensorData<data_type>(input()), getTensorData<axis_type>(axis()), \ - getTensorShape(output()), getTensorData<output_type>(output()), std::greater<data_type>()) +#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \ + luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \ + getTensorData<axis_type>(axis()), getTensorShape(output()), \ + getTensorData<output_type>(output()), std::greater<data_type>()) if (axis()->element_type() == DataType::S32) { switch (_params.output_type) diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp index 3362edbf6..119c69ccf 100644 --- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp @@ -16,6 +16,7 @@ #include "kernels/ArgMax.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,15 +33,19 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data, std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T1>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); - Tensor dimension_tensor = makeInputTensor<DataType::S32>(dimension_shape, dimension_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor dimension_tensor = + makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<T2>()); ArgMaxParams params{}; params.output_type = getElementType<T2>(); ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -94,17 +99,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions) TEST(ArgMaxTest, UnsupportedType_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, { - 1, 2, 7, 8, // - 1, 9, 7, 3, // - }); - Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); + Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); ArgMaxParams params{}; params.output_type = DataType::U8; ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp index 65ea4c09e..5545fb4d4 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp @@ -70,6 +70,11 @@ void AveragePool2D::configure() LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0); } + else if (input()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6); + LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point()); + } output()->resize({batches, output_height, output_width, depth}); } @@ -86,6 +91,9 @@ void AveragePool2D::execute() const case DataType::S16: evalSInt16(); break; + case DataType::S8: + evalSInt8(); + break; default: throw std::runtime_error("Unsupported type."); } @@ -132,6 +140,26 @@ void AveragePool2D::evalQuantized() const getTensorData<uint8_t>(output())); } +void AveragePool2D::evalSInt8() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + tflite::PoolParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.filter_height = _params.filter_height; + params.filter_width = _params.filter_width; + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + tflite::reference_integer_ops::AveragePool( + params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()), + getTensorData<int8_t>(output())); +} + void AveragePool2D::evalSInt16() const { int32_t activation_min{}; diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h index 282a58797..b98367f31 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h @@ -40,6 +40,7 @@ private: void evalFloat() const; void evalQuantized() const; void evalSInt16() const; + void evalSInt8() const; private: int32_t _padding_height{}; diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp index 4d7dab86a..7ed421129 100644 --- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/AveragePool2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,7 +27,15 @@ namespace using namespace testing; -TEST(AveragePool2DTest, Float) +class AveragePool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(AveragePool2DTest, Float) { Shape input_shape{1, 3, 5, 1}; std::vector<float> input_data{ @@ -34,7 +43,8 @@ TEST(AveragePool2DTest, Float) 1, 2, 3, 4, 5, // 6, 7, 8, 9, 10, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -47,6 +57,7 @@ TEST(AveragePool2DTest, Float) AveragePool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -57,15 +68,15 @@ TEST(AveragePool2DTest, Float) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); } -TEST(AveragePool2DTest, Uint8_0) +TEST_F(AveragePool2DTest, Uint8_0) { std::vector<float> input_data{ 0, -6, 12, 4, // -3, -2, 10, 7, // }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); Pool2DParams params{}; @@ -78,13 +89,14 @@ TEST(AveragePool2DTest, Uint8_0) AveragePool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0})); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); } -TEST(AveragePool2DTest, Uint8_1) +TEST_F(AveragePool2DTest, Uint8_1) { std::vector<float> input_data{ 0, 6, 12, 4, // @@ -92,8 +104,8 @@ TEST(AveragePool2DTest, Uint8_1) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); Pool2DParams params{}; @@ -106,13 +118,14 @@ TEST(AveragePool2DTest, Uint8_1) AveragePool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0})); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1})); } -TEST(AveragePool2DTest, SInt16) +TEST_F(AveragePool2DTest, SInt16) { Shape input_shape{1, 3, 5, 1}; std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; @@ -125,7 +138,8 @@ TEST(AveragePool2DTest, SInt16) 0, 1.5, // 4.5, 6, // }; - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); Pool2DParams params{}; @@ -138,13 +152,47 @@ TEST(AveragePool2DTest, SInt16) AveragePool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(AveragePool2DTest, Invalid_Input_Shape_NEG) +TEST_F(AveragePool2DTest, SInt8) +{ + Shape input_shape{1, 4, 5, 1}; + std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; + std::vector<float> input_data{-7, -3, 0, 2, -5, 12, -15, 3, 10, 5, + 7, -6, -1, 9, -2, 0, -5, 11, -1, -7}; + std::vector<float> ref_output_data{ + 0, 2.5, // + 1, 1.5, // + }; + + std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f); + Tensor input_tensor = makeInputTensor<DataType::S8>( + input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second); + + Pool2DParams params{}; + params.padding = Padding::VALID; + params.filter_height = 2; + params.filter_width = 3; + params.stride_height = 2; + params.stride_width = 2; + params.activation = Activation::RELU6; + + AveragePool2D kernel(&input_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); +} + +TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG) { Shape input_shape{1, 3, 5}; std::vector<float> input_data{ @@ -152,7 +200,8 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG) 1, 2, 3, 4, 5, // 6, 7, 8, 9, 10, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -167,7 +216,7 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(AveragePool2DTest, In_Out_Type_NEG) +TEST_F(AveragePool2DTest, In_Out_Type_NEG) { Shape input_shape{1, 3, 5, 1}; std::vector<float> input_data{ @@ -175,7 +224,8 @@ TEST(AveragePool2DTest, In_Out_Type_NEG) 1, 2, 3, 4, 5, // 6, 7, 8, 9, 10, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Pool2DParams params{}; @@ -190,7 +240,7 @@ TEST(AveragePool2DTest, In_Out_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(AveragePool2DTest, Quant_Param_NEG) +TEST_F(AveragePool2DTest, Quant_Param_NEG) { std::vector<float> input_data{ 0, -6, 12, 4, // @@ -199,8 +249,8 @@ TEST(AveragePool2DTest, Quant_Param_NEG) std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f); std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f); - Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param1.first, - quant_param1.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second); Pool2DParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp index 591fcc00a..bd315ff7b 100644 --- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp +++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp @@ -18,7 +18,7 @@ #include "kernels/BatchToSpaceND.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALBatchToSpaceND.h" #include <stdexcept> @@ -83,13 +83,13 @@ void BatchToSpaceND::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::BatchToSpaceND( + luci_interpreter_pal::BatchToSpaceND( getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), getTensorShape(crops()), getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::BatchToSpaceND( + luci_interpreter_pal::BatchToSpaceND( getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), getTensorShape(crops()), getTensorData<int32_t>(crops()), getTensorShape(output()), diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp index a29981d17..f3a344974 100644 --- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp +++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp @@ -16,6 +16,7 @@ #include "kernels/BatchToSpaceND.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -33,14 +34,19 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data, std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); - Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data); - Tensor crops_tensor = makeInputTensor<DataType::S32>(crops_shape, crops_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor crops_tensor = + makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -65,10 +71,11 @@ TYPED_TEST(BatchToSpaceNDTest, Simple) TEST(BatchToSpaceNDTest, Invalid_Shape_NEG) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}); - Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); @@ -77,10 +84,11 @@ TEST(BatchToSpaceNDTest, Invalid_Shape_NEG) TEST(BatchToSpaceNDTest, Invalid_Crops_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( - {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); - Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}); - Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}); + {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt index 9801e11af..1b7d0f66a 100644 --- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt +++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt @@ -1,158 +1,27 @@ -find_package(Threads REQUIRED) - set(SOURCES - Add.h - Add.cpp - ArgMax.h - ArgMax.cpp - AveragePool2D.h - AveragePool2D.cpp - BatchToSpaceND.h - BatchToSpaceND.cpp - Cast.h - Cast.cpp - Concatenation.h - Concatenation.cpp - Conv2D.h - Conv2D.cpp - DepthToSpace.h - DepthToSpace.cpp - DepthwiseConv2D.h - DepthwiseConv2D.cpp - Div.h - Div.cpp - Elu.h - Elu.cpp - Exp.h - Exp.cpp - Floor.h - Floor.cpp - FloorDiv.h - FloorDiv.cpp - Equal.h - Equal.cpp - FullyConnected.h - FullyConnected.cpp - Greater.h - Greater.cpp - GreaterEqual.h - GreaterEqual.cpp - If.h - If.cpp - InstanceNorm.h - InstanceNorm.cpp - L2Normalize.h - L2Normalize.cpp - L2Pool2D.h - L2Pool2D.cpp - LeakyRelu.h - LeakyRelu.cpp - Less.h - Less.cpp - LessEqual.h - LessEqual.cpp - LocalResponseNormalization.h - LocalResponseNormalization.cpp - LogicalAnd.h - LogicalAnd.cpp - LogicalNot.h - LogicalNot.cpp - LogicalOr.h - LogicalOr.cpp - Logistic.h - Logistic.cpp - LogSoftmax.h - LogSoftmax.cpp - Maximum.h - Maximum.cpp - MaxPool2D.h - MaxPool2D.cpp - Mean.h - Mean.cpp - Minimum.h - Minimum.cpp - MirrorPad.h - MirrorPad.cpp - Mul.h - Mul.cpp - Neg.h - Neg.cpp - NotEqual.h - NotEqual.cpp - Pack.h - Pack.cpp - Pad.h - Pad.cpp - PadV2.h - PadV2.cpp - Pow.h - Pow.cpp - PRelu.h - PRelu.cpp - Relu.h - Relu.cpp - Relu6.h - Relu6.cpp - Reshape.h - Reshape.cpp - ResizeBilinear.h - ResizeBilinear.cpp - ResizeNearestNeighbor.h - ResizeNearestNeighbor.cpp - ReverseV2.h - ReverseV2.cpp - Rsqrt.h - Rsqrt.cpp - Slice.h - Slice.cpp - Softmax.h - Softmax.cpp - SpaceToBatchND.h - SpaceToBatchND.cpp - SpaceToDepth.h - SpaceToDepth.cpp - Split.h - Split.cpp - StridedSlice.h - StridedSlice.cpp - Sqrt.h - Sqrt.cpp - Square.h - Square.cpp - SquaredDifference.h - SquaredDifference.cpp - Squeeze.h - Squeeze.cpp - Sub.h - Sub.cpp - Tanh.h - Tanh.cpp - Transpose.h - Transpose.cpp - TransposeConv.h - TransposeConv.cpp - Unpack.h - Unpack.cpp - While.h - While.cpp) + BinaryOpCommon.h + Utils.h + Utils.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp + "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" + ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp) + +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "${NODE}.h") + list(APPEND SOURCES "${NODE}.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) -list(APPEND SOURCES - BinaryOpCommon.h - Utils.h - Utils.cpp - ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc) +add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES}) +set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) -add_library(luci_interpreter_kernels STATIC ${SOURCES}) -set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR}) -target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE - "${TensorFlowRuySource_DIR}" - "${TensorFlowGEMMLowpSource_DIR}" - "${TensorFlowEigenSource_DIR}" - "${TensorFlowSource_DIR}") -target_link_libraries(luci_interpreter_kernels - PUBLIC luci_interpreter_core - PRIVATE nncc_common Threads::Threads) +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common) + +add_pal_to_target(${LUCI_INTERPRETER_KERNELS}) if(NOT ENABLE_TEST) return() @@ -160,75 +29,13 @@ endif(NOT ENABLE_TEST) nnas_find_package(GTest REQUIRED) -set(TEST_SOURCES - Add.test.cpp - ArgMax.test.cpp - AveragePool2D.test.cpp - BatchToSpaceND.test.cpp - Cast.test.cpp - Concatenation.test.cpp - Conv2D.test.cpp - DepthToSpace.test.cpp - DepthwiseConv2D.test.cpp - Div.test.cpp - Elu.test.cpp - Exp.test.cpp - Floor.test.cpp - FloorDiv.test.cpp - Equal.test.cpp - FullyConnected.test.cpp - Greater.test.cpp - GreaterEqual.test.cpp - If.test.cpp - InstanceNorm.test.cpp - L2Normalize.test.cpp - L2Pool2D.test.cpp - LeakyRelu.test.cpp - Less.test.cpp - LessEqual.test.cpp - LocalResponseNormalization.test.cpp - LogicalAnd.test.cpp - LogicalNot.test.cpp - LogicalOr.test.cpp - Logistic.test.cpp - LogSoftmax.test.cpp - Maximum.test.cpp - MaxPool2D.test.cpp - Mean.test.cpp - Minimum.test.cpp - Mul.test.cpp - Neg.test.cpp - NotEqual.test.cpp - Pack.test.cpp - Pad.test.cpp - PadV2.test.cpp - Pow.test.cpp - PRelu.test.cpp - Relu.test.cpp - Relu6.test.cpp - Reshape.test.cpp - ResizeBilinear.test.cpp - ResizeNearestNeighbor.test.cpp - ReverseV2.test.cpp - Rsqrt.test.cpp - Slice.test.cpp - Softmax.test.cpp - SpaceToBatchND.test.cpp - SpaceToDepth.test.cpp - Split.test.cpp - StridedSlice.test.cpp - Sqrt.test.cpp - Square.test.cpp - SquaredDifference.test.cpp - Squeeze.test.cpp - Sub.test.cpp - Tanh.test.cpp - Transpose.test.cpp - TransposeConv.test.cpp - Unpack.test.cpp - While.test.cpp) +macro(REGISTER_KERNEL NODE) + list(APPEND TEST_SOURCES "${NODE}.test.cpp") +endmacro(REGISTER_KERNEL) + +include(${KERNEL_REGISTER_FILE}) list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp) -GTest_AddTest(luci_interpreter_kernels_test ${TEST_SOURCES}) -target_link_libraries(luci_interpreter_kernels_test luci_interpreter_kernels) +GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS}) diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp index 42944628d..731260522 100644 --- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Cast.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,59 +31,209 @@ template <typename T1, typename T2> void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data, std::initializer_list<T2> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType input_type = getElementType<T1>(); constexpr DataType output_type = getElementType<T2>(); - Tensor input_tensor = makeInputTensor<input_type>(shape, input_data); + Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(output_type); Cast kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data)); EXPECT_THAT(extractTensorShape(output_tensor), shape); } +template <typename T> +void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data, + std::initializer_list<T> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType input_type = loco::DataType::BOOL; + constexpr DataType output_type = getElementType<T>(); + std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted; + for (auto elem : input_data) + { + input_data_converted.push_back(elem); + } + + Tensor input_tensor = + makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(output_type); + + Cast kernel(&input_tensor, &output_tensor); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), shape); +} + template <typename T> class CastTest : public ::testing::Test { }; -using DataTypes = ::testing::Types<uint8_t, int32_t, int64_t>; -TYPED_TEST_CASE(CastTest, DataTypes); +using IntDataTypes = + ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>; +TYPED_TEST_CASE(CastTest, IntDataTypes); TYPED_TEST(CastTest, FloatToInt) { Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4}, /*input_data=*/ { - 1.43f, 9.99f, 7.0f, 3.12f, // + 1.0f, 9.0f, 7.0f, 3.0f, // }, /*output_data=*/ { 1, 9, 7, 3, // }); - Check<TypeParam, TypeParam>(/*shape=*/{1, 1, 1, 4}, - /*input_data=*/ - { - 1, 9, 7, 3, // - }, - /*output_data=*/ - { - 1, 9, 7, 3, // - }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToFloat) +{ + Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1.0f, 9.0f, 7.0f, 3.0f, // + }); + SUCCEED(); +} + +template <typename T1, typename T2> void check_int() +{ + Check<T1, T2>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 9, 7, 3, // + }, + /*output_data=*/ + { + 1, 9, 7, 3, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToInt) +{ + check_int<TypeParam, uint8_t>(); + check_int<TypeParam, uint16_t>(); + check_int<TypeParam, uint32_t>(); + check_int<TypeParam, uint64_t>(); + check_int<TypeParam, int8_t>(); + check_int<TypeParam, int16_t>(); + check_int<TypeParam, int32_t>(); + check_int<TypeParam, int64_t>(); + SUCCEED(); +} + +TYPED_TEST(CastTest, IntToBool) +{ + Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1, 0, 7, 0, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TYPED_TEST(CastTest, BoolToInt) +{ + CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1, 0, 0, 1, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToBool) +{ + Check<float, bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + true, false, true, false, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToFloat) +{ + CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, false, false, true, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 0.0f, 1.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, FloatToFloat) +{ + Check<float, float>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }, + /*output_data=*/ + { + 1.0f, 0.0f, 7.0f, 0.0f, // + }); + SUCCEED(); +} + +TEST(CastTest, BoolToBool) +{ + CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4}, + /*input_data=*/ + { + true, true, false, false, // + }, + /*output_data=*/ + { + true, true, false, false, // + }); + SUCCEED(); } TEST(CastTest, UnsupportedType_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, { - 1, 2, 7, 8, // - 1, 9, 7, 3, // - }); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, + { + 1, 2, 7, 8, // + 1, 9, 7, 3, // + }, + memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::Unknown); Cast kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); + SUCCEED(); } } // namespace diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp index e3376c13d..7cfdf34b9 100644 --- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp +++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp @@ -18,7 +18,7 @@ #include "kernels/Concatenation.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/concatenation.h> #include <stdexcept> diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp index ee9b7d0d3..e4b50611a 100644 --- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Concatenation.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,12 +27,22 @@ namespace using namespace testing; -TEST(ConcatenationTest, Float) +class ConcatenationTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ConcatenationTest, Float) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; @@ -42,6 +53,10 @@ TEST(ConcatenationTest, Float) Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + for (auto t : kernel.getOutputTensors()) + { + _memory_manager->allocate_memory(*t); + } kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -53,6 +68,7 @@ TEST(ConcatenationTest, Float) Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -64,6 +80,7 @@ TEST(ConcatenationTest, Float) Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -75,6 +92,7 @@ TEST(ConcatenationTest, Float) Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -82,7 +100,7 @@ TEST(ConcatenationTest, Float) } } -TEST(ConcatenationTest, Input_Number_Check_NEG) +TEST_F(ConcatenationTest, Input_Number_Check_NEG) { Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; @@ -94,12 +112,14 @@ TEST(ConcatenationTest, Input_Number_Check_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(ConcatenationTest, Invalid_Axis_NEG) +TEST_F(ConcatenationTest, Invalid_Axis_NEG) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; @@ -110,12 +130,13 @@ TEST(ConcatenationTest, Invalid_Axis_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(ConcatenationTest, Mismatching_Input_Type_NEG) +TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; @@ -126,12 +147,14 @@ TEST(ConcatenationTest, Mismatching_Input_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG) +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; @@ -142,12 +165,14 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG) +TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; @@ -158,12 +183,12 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(ConcatenationTest, Unsupported_Configure_Type_NEG) +TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG) { std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6}; std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data); + Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S8); ConcatenationParams params{}; @@ -175,12 +200,14 @@ TEST(ConcatenationTest, Unsupported_Configure_Type_NEG) } // TODO: Remove this test when concat w/ fused_activation is supported -TEST(ConcatenationTest, With_Fused_Activation_NEG) +TEST_F(ConcatenationTest, With_Fused_Activation_NEG) { std::vector<float> input1_data{1, 2, 3, 4, 5, 6}; std::vector<float> input2_data{7, 8, 9, 10, 11, 12}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ConcatenationParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index 56ca96a34..fb5e063a9 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> +#include "PALConv2d.h" #include <stdexcept> #include <thread> @@ -30,8 +30,8 @@ namespace kernels { Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - const Conv2DParams ¶ms) - : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params) + Tensor *im2col, const Conv2DParams ¶ms) + : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params) { } @@ -45,7 +45,7 @@ void Conv2D::configure() // (3) | uint8 uint8 int32 uint8 | quantized // (4) | int8 int8 int32 int8 | quantized per channel // - // We only support (1) and (3) for now, and additionally the following: + // We only support (1), (3) and (4) for now, and additionally the following: // | input filter bias output | // ----+---------------------------+ // (5) | int16 int16 int64 int16 | @@ -58,6 +58,17 @@ void Conv2D::configure() { LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast<size_t>(filter()->shape().dim(0))); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + } else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) { LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); @@ -103,23 +114,20 @@ void Conv2D::configure() _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1; const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 || filter_height != 1 || filter_width != 1; - const bool need_im2col = + _need_im2col = input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col); - if (need_im2col) + if (_need_im2col) { const int input_depth = input_shape.dim(3); Shape im2col_shape{batches, output_height, output_width, input_depth * filter_height * filter_width}; - try - { - _im2col = - std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, ""); - } - catch (std::bad_alloc &ba) - { - // Failed memory allocation - _im2col = nullptr; - } + auto im2col = getOutputTensors()[1]; + im2col->resize(im2col_shape); + } + else + { + auto im2col = getOutputTensors()[1]; + im2col->set_allocatable(false); } } @@ -147,14 +155,15 @@ void Conv2D::execute() const evalQuantizedPerChannel(); } break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; case DataType::S16: evalQuantizedS16(); break; default: throw std::runtime_error("Unsupported type."); } - if (!!_im2col) - _im2col->deallocate(); } void Conv2D::evalFloat() const @@ -173,32 +182,16 @@ void Conv2D::evalFloat() const params.float_activation_min = activation_min; params.float_activation_max = activation_max; - if (_im2col) + float *im2col_data = nullptr; + auto im2col = getOutputTensors()[1]; + if (_need_im2col) { - try - { - tflite::optimized_ops::Conv( - params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output()), getTensorShape(_im2col.get()), - getTensorData<float>(_im2col.get())); - } - catch (std::bad_alloc &ba) - { - // Failed memory allocation - _im2col->deallocate(); - - tflite::reference_ops::Conv( - params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr); - } + im2col_data = im2col->data<float>(); } - else - tflite::reference_ops::Conv( - params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), - getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), - getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr); + luci_interpreter_pal::Conv( + params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()), + getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()), + getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data); } void Conv2D::evalQuantized() const @@ -232,16 +225,12 @@ void Conv2D::evalQuantized() const params.quantized_activation_min = activation_min; params.quantized_activation_max = activation_max; - // TODO This should only be done once (although it takes only a few microseconds). - // Also, the user should be able to adjust the number of threads. - auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>(); - gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency())); - - tflite::optimized_ops::Conv( - params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()), - getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()), - getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()), - getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get()); + auto im2col = getOutputTensors()[1]; + luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(filter()), getTensorData<uint8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<uint8_t>(output()), + getTensorShape(im2col), getTensorData<uint8_t>(im2col)); } void Conv2D::evalQuantizedPerChannel() const @@ -330,6 +319,54 @@ void Conv2D::evalQuantizedPerChannel() const } } +void Conv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; // Unused in tflite code + params.output_offset = output()->zero_point(); + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector<int32_t> shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector<int32_t> multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + int8_t *im2col_data = nullptr; + auto im2col = getOutputTensors()[1]; + if (_need_im2col) + { + im2col_data = im2col->data<int8_t>(); + } + + luci_interpreter_pal::ConvPerChannel( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), + getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data); +} + void Conv2D::evalQuantizedS16() const { const auto *input_data = getTensorData<int16_t>(input()); diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h index 86f73c251..5f1317638 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.h +++ b/compiler/luci-interpreter/src/kernels/Conv2D.h @@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams> { public: Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output, - const Conv2DParams ¶ms); + Tensor *im2col, const Conv2DParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *filter() const { return _inputs[1]; } @@ -45,10 +45,11 @@ private: void evalFloat() const; void evalQuantized() const; void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; void evalQuantizedS16() const; private: - std::unique_ptr<Tensor> _im2col; + bool _need_im2col = false; int32_t _padding_height{}; int32_t _padding_width{}; }; diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp index 8610a4fe6..277c280f5 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Conv2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,7 +27,15 @@ namespace using namespace testing; -TEST(Conv2DTest, Float) +class Conv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(Conv2DTest, Float) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; @@ -44,9 +53,13 @@ TEST(Conv2DTest, Float) -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -57,8 +70,10 @@ TEST(Conv2DTest, Float) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -70,7 +85,55 @@ TEST(Conv2DTest, Float) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, FloatCheck) +TEST_F(Conv2DTest, FloatPointwise) +{ + Shape input_shape{1, 2, 2, 2}; + Shape filter_shape{2, 1, 1, 2}; + Shape bias_shape{2}; + std::vector<float> input_data{ + 1, 2, // row = 0, col = 0 + 3, 4, // row = 0, col = 1 + 5, 6, // row = 1, col = 0 + 7, 8, // row = 1, col = 1 + }; + std::vector<float> filter_data{ + -1, 2, // out = 0 + -3, 4, // out = 1 + }; + std::vector<float> bias_data{1, 2}; + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::RELU; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(im2col); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + std::vector<float> ref_output_data{ + 4, 7, 6, 9, // row = 0 + 8, 11, 10, 13, // row = 1 + }; + std::vector<int32_t> ref_output_shape{1, 2, 2, 2}; + EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, FloatCheck) { Shape input_shape{2, 2, 4, 1}; Shape filter_shape{3, 2, 2, 1}; @@ -89,9 +152,13 @@ TEST(Conv2DTest, FloatCheck) -1, -1, 1, 1, // third 2x2 filter }; std::vector<float> bias_data{1, 2, 3}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -102,8 +169,10 @@ TEST(Conv2DTest, FloatCheck) params.dilation_width_factor = 1; params.activation = Activation::NONE; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); std::vector<float> ref_output_data{ @@ -117,7 +186,7 @@ TEST(Conv2DTest, FloatCheck) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, Uint8) +TEST_F(Conv2DTest, Uint8) { std::vector<float> input_data{ // First batch @@ -137,12 +206,15 @@ TEST(Conv2DTest, Uint8) std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); - Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, - input_quant_param.second, input_data); - Tensor filter_tensor = makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, - input_quant_param.second, filter_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); Tensor bias_tensor = makeInputTensor<DataType::S32>( - {3}, input_quant_param.first * input_quant_param.first, 0, bias_data); + {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); @@ -154,8 +226,10 @@ TEST(Conv2DTest, Uint8) params.dilation_width_factor = 1; params.activation = Activation::NONE; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); std::vector<float> ref_output_data{ @@ -169,7 +243,7 @@ TEST(Conv2DTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, Uint8_CWQ) +TEST_F(Conv2DTest, Uint8_CWQ) { const int output_channels = 3; std::vector<float> input_data{ @@ -209,12 +283,14 @@ TEST(Conv2DTest, Uint8_CWQ) bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); std::vector<int32_t> zerop(output_channels, 0); - Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, - input_quant_param.second, input_data); - Tensor filter_tensor = - makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data); - Tensor bias_tensor = - makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::U8, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); @@ -226,8 +302,10 @@ TEST(Conv2DTest, Uint8_CWQ) params.dilation_width_factor = 1; params.activation = Activation::NONE; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); std::vector<float> ref_output_data{ @@ -241,7 +319,83 @@ TEST(Conv2DTest, Uint8_CWQ) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(Conv2DTest, SInt16) +TEST_F(Conv2DTest, SInt8_CWQ) +{ + const int output_channels = 3; + std::vector<float> input_data{ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }; + std::vector<float> bias_data{1, 2, 3}; + Shape filter_shape{output_channels, 2, 2, 1}; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops, + 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, _memory_manager.get()); + Tensor im2col(DataType::S8, Shape({}), {}, ""); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + Conv2DParams params{}; + params.padding = Padding::VALID; + params.stride_height = 2; + params.stride_width = 2; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); + kernel.execute(); + + std::vector<float> ref_output_data{ + 18, 2, 5, // first batch, left + 18, 2, 5, // first batch, right + 17, 4, 3, // second batch, left + 37, 4, 3, // second batch, right + }; + std::vector<int32_t> ref_output_shape{2, 1, 2, 3}; + EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); +} + +TEST_F(Conv2DTest, SInt16) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; @@ -266,9 +420,13 @@ TEST(Conv2DTest, SInt16) 0, 40, 0, 44, // row = 1 }; - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data); - Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); Conv2DParams params{}; @@ -279,15 +437,17 @@ TEST(Conv2DTest, SInt16) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(Conv2DTest, SInt16_CWQ_weights) +TEST_F(Conv2DTest, SInt16_CWQ_weights) { Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels @@ -321,10 +481,13 @@ TEST(Conv2DTest, SInt16_CWQ_weights) bias_scales.push_back(filter_scales[i] * input_scale); std::vector<int32_t> zerop = {0, 0, 0}; - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data); - Tensor filter_tensor = - makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor im2col(DataType::S16, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); Conv2DParams params{}; @@ -335,15 +498,17 @@ TEST(Conv2DTest, SInt16_CWQ_weights) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + _memory_manager->allocate_memory(im2col); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(Conv2DTest, Unsupported_Type_Configure_NEG) +TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; @@ -361,9 +526,13 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG) -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -374,11 +543,11 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Conv2DTest, Invalid_Bias_Type_NEG) +TEST_F(Conv2DTest, Invalid_Bias_Type_NEG) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; @@ -396,9 +565,12 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG) -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<uint8_t> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -409,11 +581,11 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Conv2DTest, Invalid_Bias_Data_NEG) +TEST_F(Conv2DTest, Invalid_Bias_Data_NEG) { Shape input_shape{1, 4, 3, 2}; Shape filter_shape{2, 2, 2, 2}; @@ -431,9 +603,13 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG) -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2, 3}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -444,11 +620,11 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Conv2DTest, Invalid_Input_Shape_NEG) +TEST_F(Conv2DTest, Invalid_Input_Shape_NEG) { Shape input_shape{1, 4, 6, 1}; Shape filter_shape{2, 2, 2, 2}; @@ -466,9 +642,13 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG) -8, -6, 7, 5, // out = 1, row = 1 }; std::vector<float> bias_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); + Tensor im2col(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Conv2DParams params{}; @@ -479,7 +659,7 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG) params.dilation_width_factor = 1; params.activation = Activation::RELU; - Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params); EXPECT_ANY_THROW(kernel.configure()); } diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp index f2b9e4ccc..3a9acd1d4 100644 --- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp @@ -16,7 +16,7 @@ #include "DepthToSpace.h" #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALDepthToSpace.h" namespace luci_interpreter { @@ -62,14 +62,14 @@ void DepthToSpace::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()), - getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); break; default: throw std::runtime_error("Unsupported Type."); diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp index 3dee4ad36..9b1c09ba9 100644 --- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp @@ -16,6 +16,7 @@ #include "kernels/DepthToSpace.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -35,12 +36,14 @@ TYPED_TEST_CASE(DepthToSpaceTest, DataTypes); TYPED_TEST(DepthToSpaceTest, SimpleCase) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8}; Shape input_shape{1, 1, 2, 4}; std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8}; std::vector<int32_t> output_shape{1, 2, 4, 1}; - Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); DepthToSpaceParams params{}; @@ -48,6 +51,7 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase) DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), @@ -57,10 +61,12 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase) TEST(DepthToSpaceTest, InvalidInputShape_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; Shape input_shape{1, 2, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthToSpaceParams params{}; @@ -72,10 +78,12 @@ TEST(DepthToSpaceTest, InvalidInputShape_NEG) TEST(DepthToSpaceTest, InOutTypeMismatch_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; Shape input_shape{1, 1, 2, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); DepthToSpaceParams params{}; @@ -87,10 +95,12 @@ TEST(DepthToSpaceTest, InOutTypeMismatch_NEG) TEST(DepthToSpaceTest, InvalidBlockSize_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8}; Shape input_shape{1, 1, 2, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthToSpaceParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp index 1452f4421..f2dbf6c68 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp @@ -20,6 +20,7 @@ #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h> #include <stdexcept> @@ -45,7 +46,7 @@ void DepthwiseConv2D::configure() // (4) | int8 int8 int32 int8 | quantized per channel // (5) | int16 int8 int64 int16 | quantized per channel 16x8 // - // We only support (1) and (3) for now, and additionally the following: + // We only support (1), (3) and (4) for now, and additionally the following: // | input filter bias output | // ----+---------------------------+ // (5) | int16 int16 int64 int16 | @@ -58,6 +59,17 @@ void DepthwiseConv2D::configure() { LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); } + else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) == + filter()->scales().size()); + for (auto zerop : filter()->zero_points()) + { + LUCI_INTERPRETER_CHECK(zerop == 0); + } + LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32); + } else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16) { LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64); @@ -123,6 +135,9 @@ void DepthwiseConv2D::execute() const evalQuantizedPerChannel(); } break; + case DataType::S8: + evalQuantizedS8PerChannel(); + break; case DataType::S16: evalQuantizedS16(); break; @@ -283,6 +298,52 @@ void DepthwiseConv2D::evalQuantized() const getTensorShape(output()), getTensorData<uint8_t>(output())); } +void DepthwiseConv2D::evalQuantizedS8PerChannel() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::DepthwiseParams params{}; + + params.padding_type = tflite::PaddingType::kSame; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + params.depth_multiplier = _params.depth_multiplier; + // The kernel expects input and filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; + params.output_offset = output()->zero_point(); + params.output_multiplier = 1; // unused in tflite code + params.output_shift = 0; // unused in tflite code + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector<double> effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector<ChannelQuantMultipliers> quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector<int32_t> shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector<int32_t> multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + tflite::reference_integer_ops::DepthwiseConvPerChannel( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()), + getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()), + getTensorData<int8_t>(output())); +} + void DepthwiseConv2D::evalQuantizedS16() const { const auto *input_data = getTensorData<int16_t>(input()); diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h index 6d700dd0f..6cffd6583 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h @@ -43,6 +43,7 @@ private: void evalFloat() const; void evalQuantized() const; void evalQuantizedPerChannel() const; + void evalQuantizedS8PerChannel() const; void evalQuantizedS16() const; private: diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp index 3e2f434dd..74975899a 100644 --- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/DepthwiseConv2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,7 +27,15 @@ namespace using namespace testing; -TEST(DepthwiseConv2DTest, Float) +class DepthwiseConv2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(DepthwiseConv2DTest, Float) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{1, 2, 2, 4}; @@ -44,9 +53,12 @@ TEST(DepthwiseConv2DTest, Float) 13, -14, 15, -16, // }; std::vector<float> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; @@ -60,6 +72,7 @@ TEST(DepthwiseConv2DTest, Float) DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -70,7 +83,7 @@ TEST(DepthwiseConv2DTest, Float) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); } -TEST(DepthwiseConv2DTest, Uint8) +TEST_F(DepthwiseConv2DTest, Uint8) { std::vector<float> input_data{ 1, 2, 7, 8, // column 1 @@ -88,12 +101,14 @@ TEST(DepthwiseConv2DTest, Uint8) std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); - Tensor input_tensor = makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, - input_quant_param.second, input_data); - Tensor filter_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, - input_quant_param.second, filter_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second, + filter_data, _memory_manager.get()); Tensor bias_tensor = makeInputTensor<DataType::S32>( - {4}, input_quant_param.first * input_quant_param.first, 0, bias_data); + {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); @@ -108,6 +123,7 @@ TEST(DepthwiseConv2DTest, Uint8) DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -118,7 +134,7 @@ TEST(DepthwiseConv2DTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4})); } -TEST(DepthwiseConv2DTest, SInt16) +TEST_F(DepthwiseConv2DTest, SInt16) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{1, 2, 2, 4}; @@ -143,9 +159,12 @@ TEST(DepthwiseConv2DTest, SInt16) 167, 0, 227, 28, // }; - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data); - Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); DepthwiseConv2DParams params{}; @@ -159,13 +178,14 @@ TEST(DepthwiseConv2DTest, SInt16) DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(DepthwiseConv2DTest, SInt16_CWQ_weights) +TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights) { const int output_channels = 4; Shape input_shape{1, 4, 2, 2}; @@ -197,10 +217,12 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights) for (int i = 0; i < output_channels; ++i) bias_scales.push_back(filter_scales[i] * input_scale); std::vector<int32_t> zerop(4, 0); - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data); - Tensor filter_tensor = - makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, + filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); DepthwiseConv2DParams params{}; @@ -214,13 +236,14 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights) DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(DepthwiseConv2DTest, Uint8_CWQ_weights) +TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights) { const int output_channels = 4; Shape input_shape{1, 3, 2, 2}; @@ -267,11 +290,13 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights) bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); std::vector<int32_t> zerop(output_channels, 0); - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, - input_quant_param.second, input_data); - Tensor filter_tensor = - makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); @@ -286,6 +311,7 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights) DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); @@ -293,7 +319,83 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights) FloatArrayNear(ref_output_data, output_quant_param.first)); } -TEST(DepthwiseConv2DTest, InvalidBiasType_NEG) +TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights) +{ + const int output_channels = 4; + Shape input_shape{1, 3, 2, 2}; + Shape filter_shape{1, 2, 2, output_channels}; + Shape bias_shape{4}; + std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels}; + + std::vector<float> input_data{ + 1, 2, 7, 8, // + 3, 4, 9, 10, // + 5, 6, 11, 12, // + }; + std::vector<float> filter_data{ + 1, 2, 3, 4, // + -9, 10, -11, 12, // + 5, 6, 7, 8, // + 13, -14, 15, -16, // + }; + std::vector<float> bias_data{1, 2, 3, 4}; + std::vector<float> ref_output_data{ + 71, -34, 99, -20, // + 91, -26, 127, -4, // + }; + + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + + std::vector<std::pair<float, int32_t>> filter_quant_params; + filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(1, 0)); + filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0)); + + std::vector<float> filter_scales; + std::vector<int32_t> filter_zerops; + for (auto iter : filter_quant_params) + { + filter_scales.push_back(iter.first); + filter_zerops.push_back(iter.second); + } + + std::vector<float> bias_scales; + for (int i = 0; i < output_channels; ++i) + bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first); + std::vector<int32_t> zerop(output_channels, 0); + + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops, + 3, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data, + _memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + DepthwiseConv2DParams params{}; + params.padding = Padding::VALID; + params.depth_multiplier = 2; + params.stride_height = 1; + params.stride_width = 1; + params.dilation_height_factor = 1; + params.dilation_width_factor = 1; + params.activation = Activation::NONE; + + DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + _memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(ref_output_data, output_quant_param.first)); +} + +TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{1, 2, 2, 4}; @@ -311,9 +413,11 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG) 13, -14, 15, -16, // }; std::vector<int32_t> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; @@ -329,7 +433,7 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG) +TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{1, 2, 2, 4}; @@ -347,9 +451,12 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG) 13, -14, 15, -16, // }; std::vector<float> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); DepthwiseConv2DParams params{}; @@ -365,7 +472,7 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(DepthwiseConv2DTest, InvalidInputShape_NEG) +TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG) { Shape input_shape{4, 2, 2}; Shape filter_shape{2, 2, 4}; @@ -383,9 +490,12 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG) 13, -14, 15, -16, // }; std::vector<float> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; @@ -401,7 +511,7 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG) +TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{2, 1, 2, 4}; @@ -419,9 +529,12 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG) 13, -14, 15, -16, // }; std::vector<float> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; @@ -437,7 +550,7 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG) +TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG) { Shape input_shape{1, 4, 2, 2}; Shape filter_shape{1, 2, 4, 2}; @@ -455,9 +568,12 @@ TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG) 13, -14, 15, -16, // }; std::vector<float> bias_data{1, 2, 3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DepthwiseConv2DParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp index db1496d37..0e52ba1f0 100644 --- a/compiler/luci-interpreter/src/kernels/Div.cpp +++ b/compiler/luci-interpreter/src/kernels/Div.cpp @@ -18,7 +18,8 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/div.h> +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp index 1a0c4af15..021d68d06 100644 --- a/compiler/luci-interpreter/src/kernels/Div.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Div.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,6 +28,14 @@ namespace using namespace testing; +class DivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + float GetTolerance(float min, float max) { const float kQuantizedStep = (max - min) / 255.0f; @@ -34,7 +43,7 @@ float GetTolerance(float min, float max) return kQuantizedTolerance; } -TEST(DivTest, Float) +TEST_F(DivTest, Float) { Shape base_shape = {2, 3, 1, 1}; @@ -44,8 +53,10 @@ TEST(DivTest, Float) std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f}; std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); @@ -54,13 +65,14 @@ TEST(DivTest, Float) Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } -TEST(DivTest, FloatBroadcast) +TEST_F(DivTest, FloatBroadcast) { Shape input1_shape = {1, 3}; Shape input2_shape = {3, 1}; @@ -69,8 +81,10 @@ TEST(DivTest, FloatBroadcast) std::vector<float> input2_data{0.2f, 1.6f, 0.5f}; std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); @@ -79,12 +93,13 @@ TEST(DivTest, FloatBroadcast) Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); } -TEST(DivTest, Uint8) +TEST_F(DivTest, Uint8) { Shape base_shape = {1, 2, 2, 1}; @@ -98,10 +113,10 @@ TEST(DivTest, Uint8) std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f); - Tensor input1_tensor = - makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data); - Tensor input2_tensor = - makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); @@ -111,6 +126,7 @@ TEST(DivTest, Uint8) Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -118,10 +134,10 @@ TEST(DivTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); } -TEST(DivTest, Input_Output_Type_NEG) +TEST_F(DivTest, Input_Output_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); DivParams params{}; @@ -131,10 +147,10 @@ TEST(DivTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(DivTest, Invalid_Input_Type_NEG) +TEST_F(DivTest, Invalid_Input_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}); - Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}); + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); DivParams params{}; @@ -142,6 +158,7 @@ TEST(DivTest, Invalid_Input_Type_NEG) Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp index 456396055..697d63be4 100644 --- a/compiler/luci-interpreter/src/kernels/Elu.cpp +++ b/compiler/luci-interpreter/src/kernels/Elu.cpp @@ -17,7 +17,7 @@ #include "kernels/Elu.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALElu.h" #include <stdexcept> @@ -40,8 +40,8 @@ void Elu::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::Elu(getTensorShape(input()), getTensorData<float>(input()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp index e26eed03e..814499cdb 100644 --- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Elu.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,11 +30,14 @@ using namespace testing; void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Elu kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); (void)output_shape; @@ -58,12 +62,14 @@ TEST(EluTest, SimpleElu) TEST(EluTest, InOutTypeMismatch_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ 0, -6, 2, -4, // 3, -2, 10, -0.1, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Elu kernel(&input_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h index 69b3be774..11f025eac 100644 --- a/compiler/luci-interpreter/src/kernels/Equal.h +++ b/compiler/luci-interpreter/src/kernels/Equal.h @@ -42,9 +42,9 @@ private: private: int32_t _x_multiplier = 0; - int32_t _x_shift = 0; + int _x_shift = 0; int32_t _y_multiplier = 0; - int32_t _y_shift = 0; + int _y_shift = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp index ba2827ba9..46a0f97d8 100644 --- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Equal.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(EqualTest, FloatSimple) +class EqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(EqualTest, FloatSimple) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -44,19 +53,20 @@ TEST(EqualTest, FloatSimple) false, true, false, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Equal kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(EqualTest, FloatBroardcast) +TEST_F(EqualTest, FloatBroardcast) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -76,12 +86,13 @@ TEST(EqualTest, FloatBroardcast) true, true, true, // Row 4 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Equal kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); @@ -92,7 +103,7 @@ TEST(EqualTest, FloatBroardcast) const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; -TEST(EqualTest, Uint8Quantized) +TEST_F(EqualTest, Uint8Quantized) { std::vector<float> x_data{ 0.5, 0.5, 0.7, 0.9, // Row 1 @@ -110,24 +121,25 @@ TEST(EqualTest, Uint8Quantized) }; std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Equal kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(EqualTest, Uint8QuantizedBroadcast) +TEST_F(EqualTest, Uint8QuantizedBroadcast) { std::vector<float> x_data{ 0.4, -0.8, 0.7, 0.3, // Row 1 @@ -148,34 +160,35 @@ TEST(EqualTest, Uint8QuantizedBroadcast) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Equal kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(EqualTest, Input_Type_Mismatch_NEG) +TEST_F(EqualTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Equal kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(EqualTest, Input_Output_Type_NEG) +TEST_F(EqualTest, Input_Output_Type_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Equal kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-interpreter/src/kernels/Exp.cpp index f7b115ab3..e7c560a88 100644 --- a/compiler/luci-interpreter/src/kernels/Exp.cpp +++ b/compiler/luci-interpreter/src/kernels/Exp.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/exp.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-interpreter/src/kernels/Exp.test.cpp index 19b2c141a..a159d9db9 100644 --- a/compiler/luci-interpreter/src/kernels/Exp.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Exp.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Exp.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,13 +30,16 @@ using namespace testing; TEST(ExpTest, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); Shape input_shape{1, 1, 7}; std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Exp kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> ref_output_shape{1, 1, 7}; diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp index d90d611d9..30076fb54 100644 --- a/compiler/luci-interpreter/src/kernels/Floor.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Floor.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,7 +27,15 @@ namespace using namespace testing; -TEST(FloorTest, SimpleFloat) +class FloorTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorTest, SimpleFloat) { std::initializer_list<int32_t> input_shape{1, 2, 4, 1}; std::vector<float> input_data{ @@ -40,20 +49,22 @@ TEST(FloorTest, SimpleFloat) 3, 7, 10, -1, // Row 2 }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Floor kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(FloorTest, Input_Output_Type_NEG) +TEST_F(FloorTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); Floor kernel(&input_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp index 16831ca80..3e1b5f18e 100644 --- a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp +++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp @@ -17,6 +17,7 @@ #include "kernels/FloorDiv.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(FloorDivTest, FloatSimple) +class FloorDivTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(FloorDivTest, FloatSimple) { Shape x_shape{2, 3}; std::vector<float> x_data{ @@ -47,12 +56,13 @@ TEST(FloorDivTest, FloatSimple) 1, 1, 1, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -60,7 +70,7 @@ TEST(FloorDivTest, FloatSimple) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(FloorDivTest, FloatBroadcast) +TEST_F(FloorDivTest, FloatBroadcast) { Shape x_shape{1, 3}; std::vector<float> x_data{ @@ -81,12 +91,13 @@ TEST(FloorDivTest, FloatBroadcast) 1, 3, -4, // Row 3 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -94,36 +105,37 @@ TEST(FloorDivTest, FloatBroadcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(FloorDivTest, DivByZero_NEG) +TEST_F(FloorDivTest, DivByZero_NEG) { Shape shape{3}; std::vector<float> x_data{1, 0, -1}; std::vector<float> y_data{0, 0, 0}; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } -TEST(FloorDivTest, Input_Output_Type_Mismatch_NEG) +TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(FloorDivTest, Input_Type_Mismatch_NEG) +TEST_F(FloorDivTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp index 48433b42d..cfe8f8bf2 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp @@ -19,6 +19,7 @@ #include "kernels/Utils.h" #include <tensorflow/lite/kernels/internal/reference/fully_connected.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h> #include <stdexcept> @@ -48,6 +49,12 @@ void FullyConnected::configure() LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32); LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32) } + else if (weights()->element_type() == DataType::S8) + { + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8); + LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32) + } else { throw std::runtime_error("Unsupported type."); @@ -77,6 +84,9 @@ void FullyConnected::execute() const case DataType::U8: evalQuantized(); break; + case DataType::S8: + evalQuantizedS8(); + break; case DataType::FLOAT32: evalFloat(); break; @@ -135,5 +145,38 @@ void FullyConnected::evalQuantized() const getTensorShape(output()), getTensorData<uint8_t>(output())); } +void FullyConnected::evalQuantizedS8() const +{ + double real_multiplier = 0.0; + int output_shift; + int32_t output_activation_min; + int32_t output_activation_max; + int32_t output_multiplier; + real_multiplier = + getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale()); + quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + calculateActivationRangeQuantized(params().activation, output(), &output_activation_min, + &output_activation_max); + + int32_t input_offset = -input()->zero_point(); + int32_t filter_offset = -weights()->zero_point(); + int32_t output_offset = output()->zero_point(); + + tflite::FullyConnectedParams op_params{}; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.lhs_cacheable = false; + op_params.rhs_cacheable = false; + tflite::reference_integer_ops::FullyConnected( + op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()), + getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()), + getTensorShape(output()), getTensorData<int8_t>(output())); +} + } // namespace kernels } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h index 204f11ebb..2a7c068c0 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.h +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h @@ -42,6 +42,7 @@ public: private: void evalFloat() const; void evalQuantized() const; + void evalQuantizedS8() const; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp index 0259d3e1d..b0eda0145 100644 --- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp +++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp @@ -16,6 +16,7 @@ #include "kernels/FullyConnected.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,9 +33,13 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<float> input_data, std::initializer_list<float> weights_data, std::initializer_list<float> bias_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FullyConnectedParams params{}; @@ -42,6 +47,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -49,21 +55,63 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int } template <> +void Check<int8_t>(std::initializer_list<int32_t> input_shape, + std::initializer_list<int32_t> weights_shape, + std::initializer_list<int32_t> bias_shape, + std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, + std::initializer_list<float> weights_data, + std::initializer_list<float> bias_data, std::initializer_list<float> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const float quantized_tolerance = getTolerance(-127, 128, 255); + std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64); + std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128); + Tensor input_tensor = + makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); + Tensor output_tensor = + makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second); + + FullyConnectedParams params{}; + params.activation = Activation::RELU; + + FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); + kernel.configure(); + memory_manager->allocate_memory(output_tensor); + kernel.execute(); + + EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); + EXPECT_THAT(dequantizeTensorData(output_tensor), + FloatArrayNear(output_data, quantized_tolerance)); +} + +template <> void Check<uint8_t>( std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape, std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> weights_data, std::initializer_list<float> bias_data, std::initializer_list<float> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); const float quantized_tolerance = getTolerance(-127, 128, 255); std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64); std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128); - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, - input_quant_param.second, input_data); - Tensor weights_tensor = makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, - input_quant_param.second, weights_data); - Tensor bias_tensor = makeInputTensor<DataType::S32>( - bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second, + weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0, + bias_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); @@ -72,6 +120,7 @@ void Check<uint8_t>( FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -83,7 +132,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test { }; -using DataTypes = ::testing::Types<float, uint8_t>; +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; TYPED_TEST_CASE(FullyConnectedTest, DataTypes); TYPED_TEST(FullyConnectedTest, Simple) @@ -121,9 +170,13 @@ TEST(FullyConnectedTest, InvalidBiasType_NEG) Shape bias_shape{3}; std::vector<int32_t> bias_data{-1, -5, -8}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data); - Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FullyConnectedParams params{}; @@ -149,9 +202,14 @@ TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG) Shape bias_shape{3}; std::vector<float> bias_data{-1, -5, -8}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FullyConnectedParams params{}; @@ -180,9 +238,14 @@ TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG) Shape bias_shape{3}; std::vector<float> bias_data{-1, -5, -8}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data); - Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor weights_tensor = + makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); FullyConnectedParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h index a65d29f5c..877c139c9 100644 --- a/compiler/luci-interpreter/src/kernels/Greater.h +++ b/compiler/luci-interpreter/src/kernels/Greater.h @@ -42,9 +42,9 @@ private: private: int32_t _x_multiplier = 0; - int32_t _x_shift = 0; + int _x_shift = 0; int32_t _y_multiplier = 0; - int32_t _y_shift = 0; + int _y_shift = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp index 3fcc86603..ba3925f17 100644 --- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Greater.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(GreaterTest, FloatSimple) +class GreaterTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GreaterTest, FloatSimple) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -44,19 +53,20 @@ TEST(GreaterTest, FloatSimple) true, false, false, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Greater kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(GreaterTest, FloatBroardcast) +TEST_F(GreaterTest, FloatBroardcast) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -74,12 +84,13 @@ TEST(GreaterTest, FloatBroardcast) false, false, true, // Row 3 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Greater kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); @@ -90,7 +101,7 @@ TEST(GreaterTest, FloatBroardcast) const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; -TEST(GreaterTest, Uint8Quantized) +TEST_F(GreaterTest, Uint8Quantized) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -108,21 +119,22 @@ TEST(GreaterTest, Uint8Quantized) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Greater kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(GreaterTest, Uint8QuantizedRescale) +TEST_F(GreaterTest, Uint8QuantizedRescale) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -142,21 +154,22 @@ TEST(GreaterTest, Uint8QuantizedRescale) std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Greater kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(GreaterTest, Uint8QuantizedBroadcast) +TEST_F(GreaterTest, Uint8QuantizedBroadcast) { std::vector<float> x_data{ 0.4, -0.8, 0.7, 0.3, // Row 1 @@ -175,34 +188,35 @@ TEST(GreaterTest, Uint8QuantizedBroadcast) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Greater kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(GreaterTest, Input_Type_Mismatch_NEG) +TEST_F(GreaterTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Greater kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(GreaterTest, Input_Output_Type_NEG) +TEST_F(GreaterTest, Input_Output_Type_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Greater kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h index e948d698f..4a0f48748 100644 --- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h @@ -42,9 +42,9 @@ private: private: int32_t _x_multiplier = 0; - int32_t _x_shift = 0; + int _x_shift = 0; int32_t _y_multiplier = 0; - int32_t _y_shift = 0; + int _y_shift = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp index 7c79d8abc..a9d172301 100644 --- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp +++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp @@ -17,6 +17,7 @@ #include "kernels/GreaterEqual.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(GreaterEqualTest, FloatSimple) +class GreaterEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(GreaterEqualTest, FloatSimple) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -44,19 +53,20 @@ TEST(GreaterEqualTest, FloatSimple) true, true, false, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(GreaterEqualTest, FloatBroardcast) +TEST_F(GreaterEqualTest, FloatBroardcast) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -74,12 +84,13 @@ TEST(GreaterEqualTest, FloatBroardcast) false, false, true, // Row 3 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); @@ -90,7 +101,7 @@ TEST(GreaterEqualTest, FloatBroardcast) const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; -TEST(GreaterEqualTest, Uint8Quantized) +TEST_F(GreaterEqualTest, Uint8Quantized) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -108,21 +119,22 @@ TEST(GreaterEqualTest, Uint8Quantized) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(GreaterEqualTest, Uint8QuantizedRescale) +TEST_F(GreaterEqualTest, Uint8QuantizedRescale) { std::vector<float> x_data{ 0.5, 0.5, 0.7, 0.9, // Row 1 @@ -142,21 +154,22 @@ TEST(GreaterEqualTest, Uint8QuantizedRescale) std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(GreaterEqualTest, Uint8QuantizedBroadcast) +TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast) { std::vector<float> x_data{ 0.4, -0.8, 0.7, 0.3, // Row 1 @@ -175,34 +188,35 @@ TEST(GreaterEqualTest, Uint8QuantizedBroadcast) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(GreaterEqualTest, Input_Type_Mismatch_NEG) +TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(GreaterEqualTest, Input_Output_Type_NEG) +TEST_F(GreaterEqualTest, Input_Output_Type_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp index a267f6267..971708bca 100644 --- a/compiler/luci-interpreter/src/kernels/If.cpp +++ b/compiler/luci-interpreter/src/kernels/If.cpp @@ -68,6 +68,8 @@ void If::execute() const const int32_t num_elements = input(i)->shape().num_elements(); const std::size_t element_size = getDataTypeSize(input(i)->element_type()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(graph_inputs[i]); std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size); } @@ -78,6 +80,8 @@ void If::execute() const { LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type()); output(i)->resize(graph_outputs[i]->shape()); + // TODO: Think about how allocate memory for output in main graph + active_graph->configureAllocations(output(i)); const int32_t num_elements = output(i)->shape().num_elements(); const std::size_t element_size = getDataTypeSize(output(i)->element_type()); diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp index 0dba310d9..c5f4faf75 100644 --- a/compiler/luci-interpreter/src/kernels/If.test.cpp +++ b/compiler/luci-interpreter/src/kernels/If.test.cpp @@ -21,6 +21,8 @@ #include "kernels/Mul.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + namespace luci_interpreter { namespace kernels @@ -30,9 +32,17 @@ namespace using namespace testing; -RuntimeGraph *buildAddSubgraph(RuntimeModule *module) +class IfTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) { - RuntimeGraph *graph = module->addGraph(); + RuntimeGraph *graph = module->addGraph(memory_manager); Tensor *input1 = graph->addTensor( std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); Tensor *input2 = graph->addTensor( @@ -40,6 +50,10 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module) Tensor *output = graph->addTensor( std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); + graph->setInputTensors({input1, input2}); graph->setOutputTensors({output}); @@ -50,9 +64,9 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module) return graph; } -RuntimeGraph *buildMulSubgraph(RuntimeModule *module) +RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager) { - RuntimeGraph *graph = module->addGraph(); + RuntimeGraph *graph = module->addGraph(memory_manager); Tensor *input1 = graph->addTensor( std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); Tensor *input2 = graph->addTensor( @@ -60,6 +74,10 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module) Tensor *output = graph->addTensor( std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, "")); + memory_manager->allocate_memory(*input1); + memory_manager->allocate_memory(*input2); + memory_manager->allocate_memory(*output); + graph->setInputTensors({input1, input2}); graph->setOutputTensors({output}); @@ -70,67 +88,69 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module) return graph; } -TEST(IfTest, CondTrue) +TEST_F(IfTest, CondTrue) { - Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}); - Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}); - Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}); + Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); RuntimeModule module(nullptr); - RuntimeGraph *then_graph = buildAddSubgraph(&module); - RuntimeGraph *else_graph = buildMulSubgraph(&module); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); kernel.configure(); + _memory_manager->allocate_memory(output); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9})); } -TEST(IfTest, CondFalse) +TEST_F(IfTest, CondFalse) { - Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}); - Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}); - Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}); + Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); RuntimeModule module(nullptr); - RuntimeGraph *then_graph = buildAddSubgraph(&module); - RuntimeGraph *else_graph = buildMulSubgraph(&module); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); kernel.configure(); + _memory_manager->allocate_memory(output); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14})); } -TEST(IfTest, InvalidCondType_NEG) +TEST_F(IfTest, InvalidCondType_NEG) { - Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}); - Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}); - Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}); + Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); RuntimeModule module(nullptr); - RuntimeGraph *then_graph = buildAddSubgraph(&module); - RuntimeGraph *else_graph = buildMulSubgraph(&module); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); EXPECT_ANY_THROW(kernel.configure()); } -TEST(IfTest, InvalidCondElementNum_NEG) +TEST_F(IfTest, InvalidCondElementNum_NEG) { - Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}); - Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}); - Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}); + Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get()); + Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get()); + Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); RuntimeModule module(nullptr); - RuntimeGraph *then_graph = buildAddSubgraph(&module); - RuntimeGraph *else_graph = buildMulSubgraph(&module); + RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get()); + RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get()); If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph); EXPECT_ANY_THROW(kernel.configure()); diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp index 1d4ccb4cd..04400c3c0 100644 --- a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp +++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp @@ -15,6 +15,7 @@ */ #include "kernels/InstanceNorm.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -24,11 +25,21 @@ namespace { using namespace testing; -TEST(InstanceNormTest, Simple) + +class InstanceNormTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(InstanceNormTest, Simple) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}); - Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}); - Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); InstanceNormParams params{}; @@ -37,17 +48,19 @@ TEST(InstanceNormTest, Simple) InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2})); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1})); } -TEST(InstanceNormTest, Single_gamma_beta) +TEST_F(InstanceNormTest, Single_gamma_beta) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}); - Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}); - Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); InstanceNormParams params{}; @@ -56,17 +69,19 @@ TEST(InstanceNormTest, Single_gamma_beta) InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2})); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2})); } -TEST(InstanceNormTest, Wrong_gamma_beta_dim_NEG) +TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}); - Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}); - Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get()); + Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get()); + Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); InstanceNormParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp index 2eaf5404e..64222953f 100644 --- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp @@ -17,7 +17,7 @@ #include "kernels/L2Normalize.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALL2Normalize.h" #include <stdexcept> @@ -66,9 +66,9 @@ template <typename T> void L2Normalize::eval(int32_t zero_point) const { tflite::L2NormalizationParams op_params{}; op_params.input_zero_point = zero_point; - tflite::optimized_ops::L2Normalization(op_params, getTensorShape(input()), - getTensorData<T>(input()), getTensorShape(output()), - getTensorData<T>(output())); + luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()), + getTensorData<T>(input()), getTensorShape(output()), + getTensorData<T>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp index 6281b451b..1e565e358 100644 --- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp @@ -16,6 +16,7 @@ */ #include "kernels/L2Normalize.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,7 +31,9 @@ template <typename T> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); L2NormParams params{}; @@ -38,6 +41,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int L2Normalize kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); @@ -50,12 +54,13 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f, std::max(input_data) > 0 ? std::max(input_data) : 0.f); - Tensor input_tensor = - makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128); L2NormParams params{}; @@ -63,6 +68,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, L2Normalize kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -85,9 +91,11 @@ TYPED_TEST(L2NormalizeTest, Simple) TEST(L2NormalizeTest, ActivationType_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); L2NormParams params{}; @@ -99,9 +107,11 @@ TEST(L2NormalizeTest, ActivationType_NEG) TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}; - Tensor input_tensor = makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127); L2NormParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp index 5bf3ba5a8..5a88808d5 100644 --- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALL2Pool2D.h" #include <stdexcept> @@ -75,9 +75,9 @@ void L2Pool2D::execute() const op_params.padding_values.width = _padding_width; op_params.float_activation_min = activation_min; op_params.float_activation_max = activation_max; - tflite::optimized_ops::L2Pool(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp index 52f426a08..289742a50 100644 --- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp @@ -17,6 +17,7 @@ #include "kernels/L2Pool2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,14 +28,23 @@ namespace using namespace testing; -TEST(L2Pool2DTest, FloatNone) +class L2Pool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(L2Pool2DTest, FloatNone) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ 0, 6, 2, 4, // 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -47,6 +57,7 @@ TEST(L2Pool2DTest, FloatNone) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.5}; @@ -54,14 +65,15 @@ TEST(L2Pool2DTest, FloatNone) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatRelu) +TEST_F(L2Pool2DTest, FloatRelu) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ -1, -6, 2, 4, // -3, -2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -74,6 +86,7 @@ TEST(L2Pool2DTest, FloatRelu) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.53553, 6.5}; @@ -81,14 +94,15 @@ TEST(L2Pool2DTest, FloatRelu) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatRelu1) +TEST_F(L2Pool2DTest, FloatRelu1) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ -0.1, -0.6, 2, 4, // -0.3, -0.2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -101,6 +115,7 @@ TEST(L2Pool2DTest, FloatRelu1) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.353553, 1.0}; @@ -108,14 +123,15 @@ TEST(L2Pool2DTest, FloatRelu1) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatRelu6) +TEST_F(L2Pool2DTest, FloatRelu6) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ -0.1, -0.6, 2, 4, // -0.3, -0.2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -128,6 +144,7 @@ TEST(L2Pool2DTest, FloatRelu6) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.353553, 6.0}; @@ -135,14 +152,15 @@ TEST(L2Pool2DTest, FloatRelu6) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatPaddingSame) +TEST_F(L2Pool2DTest, FloatPaddingSame) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ 0, 6, 2, 4, // 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -155,6 +173,7 @@ TEST(L2Pool2DTest, FloatPaddingSame) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.5}; @@ -162,14 +181,15 @@ TEST(L2Pool2DTest, FloatPaddingSame) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatPaddingSameStride) +TEST_F(L2Pool2DTest, FloatPaddingSameStride) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ 0, 6, 2, 4, // 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -182,6 +202,7 @@ TEST(L2Pool2DTest, FloatPaddingSameStride) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0}; @@ -189,14 +210,15 @@ TEST(L2Pool2DTest, FloatPaddingSameStride) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, FloatPaddingValidStride) +TEST_F(L2Pool2DTest, FloatPaddingValidStride) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ 0, 6, 2, 4, // 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -209,6 +231,7 @@ TEST(L2Pool2DTest, FloatPaddingValidStride) L2Pool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{3.5, 6.0, 6.5}; @@ -216,14 +239,15 @@ TEST(L2Pool2DTest, FloatPaddingValidStride) // TODO make a Shape checking of output_tensor. } -TEST(L2Pool2DTest, InvalidInputShape_NEG) +TEST_F(L2Pool2DTest, InvalidInputShape_NEG) { Shape input_shape{1, 2, 4}; std::vector<float> input_data{ 0, 6, 2, 4, // 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -238,14 +262,15 @@ TEST(L2Pool2DTest, InvalidInputShape_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(L2Pool2DTest, InvalidInputOutputType_NEG) +TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG) { Shape input_shape{1, 2, 4}; std::vector<float> input_data{ 0, 6, 2, 4, // 3, 2, 10, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Pool2DParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp index f468da5d3..3833a55e8 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp @@ -18,8 +18,9 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h> + +#include "PALLeakyRelu.h" #include <stdexcept> @@ -66,9 +67,8 @@ void LeakyRelu::evalFloat() const { tflite::LeakyReluParams op_params{}; op_params.alpha = params().alpha; - tflite::optimized_ops::LeakyRelu(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); } void LeakyRelu::evalQuantized() const diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp index b5cc3e7fc..6ec8a348a 100644 --- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp @@ -16,6 +16,7 @@ #include "kernels/LeakyRelu.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,8 +32,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<float> input_data, std::initializer_list<float> output_data, float alpha) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); LeakyReluParams params{}; @@ -41,6 +44,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int LeakyRelu kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -53,10 +57,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data, float alpha) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f); - Tensor input_tensor = - makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); LeakyReluParams params{}; @@ -65,6 +70,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, LeakyRelu kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -99,10 +105,13 @@ TYPED_TEST(LeakReluTest, Simple) TEST(LeakReluTest, IvalidInputOutputType_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, { - 0.0f, 1.0f, 3.0f, // Row 1 - 1.0f, -1.0f, -2.0f, // Row 2 - }); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, + { + 0.0f, 1.0f, 3.0f, // Row 1 + 1.0f, -1.0f, -2.0f, // Row 2 + }, + memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); LeakyReluParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h index fe03e10b1..293740e72 100644 --- a/compiler/luci-interpreter/src/kernels/Less.h +++ b/compiler/luci-interpreter/src/kernels/Less.h @@ -42,9 +42,9 @@ private: private: int32_t _x_multiplier = 0; - int32_t _x_shift = 0; + int _x_shift = 0; int32_t _y_multiplier = 0; - int32_t _y_shift = 0; + int _y_shift = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp index 2972bd559..e9d09b288 100644 --- a/compiler/luci-interpreter/src/kernels/Less.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Less.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(LessTest, FloatSimple) +class LessTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LessTest, FloatSimple) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -44,19 +53,20 @@ TEST(LessTest, FloatSimple) false, false, true, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Less kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(LessTest, FloatBroardcast) +TEST_F(LessTest, FloatBroardcast) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -74,12 +84,13 @@ TEST(LessTest, FloatBroardcast) true, true, false, // Row 3 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Less kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); @@ -90,7 +101,7 @@ TEST(LessTest, FloatBroardcast) const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; -TEST(LessTest, Uint8Quantized) +TEST_F(LessTest, Uint8Quantized) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -108,21 +119,22 @@ TEST(LessTest, Uint8Quantized) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Less kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(LessTest, Uint8QuantizedRescale) +TEST_F(LessTest, Uint8QuantizedRescale) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -142,21 +154,22 @@ TEST(LessTest, Uint8QuantizedRescale) std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Less kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(LessTest, Uint8QuantizedBroadcast) +TEST_F(LessTest, Uint8QuantizedBroadcast) { std::vector<float> x_data{ 0.4, -0.8, 0.7, 0.3, // Row 1 @@ -175,34 +188,35 @@ TEST(LessTest, Uint8QuantizedBroadcast) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Less kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(LessTest, Input_Type_Mismatch_NEG) +TEST_F(LessTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Less kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(LessTest, Input_Output_Type_NEG) +TEST_F(LessTest, Input_Output_Type_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Less kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h index ed4b0f1ea..b6da1a2a8 100644 --- a/compiler/luci-interpreter/src/kernels/LessEqual.h +++ b/compiler/luci-interpreter/src/kernels/LessEqual.h @@ -42,9 +42,9 @@ private: private: int32_t _x_multiplier = 0; - int32_t _x_shift = 0; + int _x_shift = 0; int32_t _y_multiplier = 0; - int32_t _y_shift = 0; + int _y_shift = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp index db65815a6..0558003dd 100644 --- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LessEqual.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(LessEqualTest, FloatSimple) +class LessEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LessEqualTest, FloatSimple) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -44,19 +53,20 @@ TEST(LessEqualTest, FloatSimple) false, true, true, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(LessEqualTest, FloatBroardcast) +TEST_F(LessEqualTest, FloatBroardcast) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -74,12 +84,13 @@ TEST(LessEqualTest, FloatBroardcast) true, true, false, // Row 3 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); @@ -90,7 +101,7 @@ TEST(LessEqualTest, FloatBroardcast) const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; -TEST(LessEqualTest, Uint8Quantized) +TEST_F(LessEqualTest, Uint8Quantized) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -108,21 +119,22 @@ TEST(LessEqualTest, Uint8Quantized) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(LessEqualTest, Uint8QuantizedRescale) +TEST_F(LessEqualTest, Uint8QuantizedRescale) { std::vector<float> x_data{ 0.5, 0.6, 0.7, 0.9, // Row 1 @@ -142,21 +154,22 @@ TEST(LessEqualTest, Uint8QuantizedRescale) std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(LessEqualTest, Uint8QuantizedBroadcast) +TEST_F(LessEqualTest, Uint8QuantizedBroadcast) { std::vector<float> x_data{ 0.4, -0.8, 0.7, 0.3, // Row 1 @@ -175,34 +188,35 @@ TEST(LessEqualTest, Uint8QuantizedBroadcast) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(LessEqualTest, Input_Type_Mismatch_NEG) +TEST_F(LessEqualTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(LessEqualTest, Input_Output_Type_NEG) +TEST_F(LessEqualTest, Input_Output_Type_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LessEqual kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp index fd2ec41a1..a2bf442b0 100644 --- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp +++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALLocalResponseNormalization.h" #include <stdexcept> @@ -52,7 +52,7 @@ void LocalResponseNormalization::execute() const op_params.bias = params().bias; op_params.alpha = params().alpha; op_params.beta = params().beta; - tflite::optimized_ops::LocalResponseNormalization( + luci_interpreter_pal::LocalResponseNormalization( op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()), getTensorData<float>(output())); break; diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp index 6a4331d34..4a9d4739f 100644 --- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LocalResponseNormalization.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,10 +28,18 @@ namespace using namespace testing; -TEST(LocalResponseNormalizationTest, SameAsL2Norm) +class LocalResponseNormalizationTest : public ::testing::Test { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LocalResponseNormalizationTest, SameAsL2Norm) +{ + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -41,16 +50,17 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})); } -TEST(LocalResponseNormalizationTest, WithAlpha) +TEST_F(LocalResponseNormalizationTest, WithAlpha) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -61,16 +71,17 @@ TEST(LocalResponseNormalizationTest, WithAlpha) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})); } -TEST(LocalResponseNormalizationTest, WithBias) +TEST_F(LocalResponseNormalizationTest, WithBias) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -81,16 +92,17 @@ TEST(LocalResponseNormalizationTest, WithBias) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})); } -TEST(LocalResponseNormalizationTest, SmallRadius) +TEST_F(LocalResponseNormalizationTest, SmallRadius) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -101,16 +113,17 @@ TEST(LocalResponseNormalizationTest, SmallRadius) LocalResponseNormalization kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})); } -TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG) +TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LocalResponseNormalizationParams params{}; @@ -123,10 +136,10 @@ TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(LocalResponseNormalizationTest, InvalidInputOutputType_NEG) +TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); LocalResponseNormalizationParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp index 03d13e4ce..79c315338 100644 --- a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp +++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp @@ -18,9 +18,9 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/log_softmax.h> -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALLogSoftmax.h" namespace luci_interpreter { @@ -41,8 +41,7 @@ void LogSoftmax::configure() params.table = _table; params.beta = 1.0; - - tflite::optimized_ops::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta); + luci_interpreter_pal::PopulateSoftmaxLookupTable(¶ms, input()->scale(), params.beta); } output()->resize(input()->shape()); } @@ -76,6 +75,7 @@ void LogSoftmax::evalQuantized() const const auto input_scale = input()->scale(); uint8_t *output_data = getTensorData<uint8_t>(output()); const uint8_t *input_data = getTensorData<uint8_t>(input()); + const float beta = 1.0; tflite::SoftmaxParams params{}; @@ -83,8 +83,9 @@ void LogSoftmax::evalQuantized() const params.zero_point = output()->zero_point(); params.scale = output()->scale(); - tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, - output_data); + luci_interpreter_pal::InitializeParams(¶ms, input_scale, beta); + luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape, + output_data); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp index 8a90c1dd0..50dcd5c28 100644 --- a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LogSoftmax.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,18 +28,28 @@ namespace using namespace testing; -TEST(LogSoftmaxTest, Float) +class LogSoftmaxTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogSoftmaxTest, Float) { Shape input_shape{2, 4}; std::vector<float> input_data{ 0, -6, 2, 4, // 3, -2, 10, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); LogSoftmax kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -48,7 +59,7 @@ TEST(LogSoftmaxTest, Float) EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(LogSoftmaxTest, Uint8) +TEST_F(LogSoftmaxTest, Uint8) { float kMin = -10; float kMax = 10; @@ -58,12 +69,13 @@ TEST(LogSoftmaxTest, Uint8) 0, -6, 2, 4, // 3, -2, 10, 1, // }; - Tensor input_tensor = - makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); LogSoftmax kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -78,28 +90,29 @@ TEST(LogSoftmaxTest, Uint8) ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111})); } -TEST(LogSoftmaxTest, InvalidInputOutputType_NEG) +TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG) { std::vector<float> input_data{ 0, -6, 2, 4, // 3, -2, 10, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 4}, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255); LogSoftmax kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(LogSoftmaxTest, InvalidOutputQuantParam_NEG) +TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG) { std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10); std::vector<float> input_data{ 0, -6, 2, 4, // 3, -2, 10, 1, // }; - Tensor input_tensor = - makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255); LogSoftmax kernel(&input_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp index 564f191d5..21b7951e0 100644 --- a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LogicalAnd.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,15 +28,26 @@ namespace using namespace testing; -TEST(LogicalAndTest, Basic) +class LogicalAndTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalAndTest, Basic) { Shape input_shape{1, 1, 1, 4}; - Tensor input_tensor1 = makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}); - Tensor input_tensor2 = makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}); + Tensor input_tensor1 = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), @@ -43,14 +55,17 @@ TEST(LogicalAndTest, Basic) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); } -TEST(LogicalAndTest, Broadcast) +TEST_F(LogicalAndTest, Broadcast) { - Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}); - Tensor input_tensor2 = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}); + Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), @@ -58,20 +73,23 @@ TEST(LogicalAndTest, Broadcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); } -TEST(LogicalAndTest, MismatchInputType_NEG) +TEST_F(LogicalAndTest, MismatchInputType_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}); - Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}); + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(LogicalAndTest, InputTypeInvalid_NEG) +TEST_F(LogicalAndTest, InputTypeInvalid_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}); + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp index dccb81102..3cbf27f6b 100644 --- a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LogicalNot.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,14 +28,24 @@ namespace using namespace testing; -TEST(LogicalNotTest, Basic) +class LogicalNotTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalNotTest, Basic) { Shape input_shape{1, 1, 1, 4}; - Tensor input_tensor = makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}); + Tensor input_tensor = + makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalNot kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), @@ -42,18 +53,20 @@ TEST(LogicalNotTest, Basic) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); } -TEST(LogicalNotTest, OutputTypeInvalid_NEG) +TEST_F(LogicalNotTest, OutputTypeInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}); + Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); LogicalNot kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(LogicalNotTest, InputTypeInvalid_NEG) +TEST_F(LogicalNotTest, InputTypeInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}); + Tensor input_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalNot kernel(&input_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp index 7027a2a8b..f289ca64f 100644 --- a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp +++ b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp @@ -20,8 +20,6 @@ #include "kernels/Utils.h" #include "kernels/BinaryOpCommon.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> - namespace luci_interpreter { namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp index 677eac96a..d65a69a5e 100644 --- a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp +++ b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp @@ -17,6 +17,7 @@ #include "kernels/LogicalOr.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,15 +28,26 @@ namespace using namespace testing; -TEST(LogicalOrTest, Basic) +class LogicalOrTest : public ::testing::Test { - Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}); - Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false}); +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(LogicalOrTest, Basic) +{ + Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false}, + _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), @@ -43,15 +55,18 @@ TEST(LogicalOrTest, Basic) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); } -TEST(LogicalOrTest, Broadcast) +TEST_F(LogicalOrTest, Broadcast) { - Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}); - Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}); + Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true}, + _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), @@ -59,10 +74,12 @@ TEST(LogicalOrTest, Broadcast) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4)); } -TEST(LogicalOrTest, MismatchInputType_NEG) +TEST_F(LogicalOrTest, MismatchInputType_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}); - Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}); + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); @@ -70,10 +87,11 @@ TEST(LogicalOrTest, MismatchInputType_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(LogicalOrTest, InputTypeInvalid_NEG) +TEST_F(LogicalOrTest, InputTypeInvalid_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}); + Tensor input1_tensor = + makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp index 97d7bf13d..58e4f185d 100644 --- a/compiler/luci-interpreter/src/kernels/Logistic.cpp +++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/logistic.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp index 41369a417..70227563f 100644 --- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Logistic.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,11 +31,15 @@ template <typename T> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<getElementType<T>()>(input_shape, input_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<T>()); Logistic kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); @@ -47,14 +52,18 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(std::min(input_data), std::max(input_data)); - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, - input_quant_param.second, input_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); Logistic kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -107,9 +116,12 @@ TYPED_TEST(LogisticTest, Simple) TEST(LogisticTest, IvalidInputOutputType_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape = {1}; std::vector<float> input_data{10}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0); Logistic kernel(&input_tensor, &output_tensor); @@ -118,11 +130,13 @@ TEST(LogisticTest, IvalidInputOutputType_NEG) TEST(LogisticTest, IvalidQuantParam_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); Shape input_shape = {2}; std::vector<float> input_data{-10, 10}; std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10); - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, - input_quant_param.second, input_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0); Logistic kernel(&input_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp index b9991f7ec..44f2a222f 100644 --- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp +++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp @@ -16,6 +16,7 @@ #include "kernels/MaxPool2D.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,7 +27,15 @@ namespace using namespace testing; -TEST(MaxPool2DTest, Float) +class MaxPool2DTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MaxPool2DTest, Float) { Shape input_shape{1, 3, 5, 1}; std::vector<float> input_data{ @@ -34,7 +43,8 @@ TEST(MaxPool2DTest, Float) -7, -6, -5, -4, -3, // 5, 4, 3, 6, 7, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pool2DParams params{}; @@ -47,6 +57,7 @@ TEST(MaxPool2DTest, Float) MaxPool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -58,15 +69,15 @@ TEST(MaxPool2DTest, Float) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MaxPool2DTest, Uint8) +TEST_F(MaxPool2DTest, Uint8) { std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375); std::vector<float> input_data{ 0, -6, 12, 4, // -3, -2, 10, 7, // }; - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); Pool2DParams params{}; @@ -79,6 +90,7 @@ TEST(MaxPool2DTest, Uint8) MaxPool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.0, 6.0}; @@ -87,7 +99,7 @@ TEST(MaxPool2DTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MaxPool2DTest, SInt16) +TEST_F(MaxPool2DTest, SInt16) { Shape input_shape{1, 3, 5, 1}; std::vector<int32_t> ref_output_shape{1, 2, 2, 1}; @@ -101,7 +113,8 @@ TEST(MaxPool2DTest, SInt16) 5, 6, // }; - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); Pool2DParams params{}; @@ -114,6 +127,7 @@ TEST(MaxPool2DTest, SInt16) MaxPool2D kernel(&input_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); diff --git a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp index 2ddaeaf04..e4a505b03 100644 --- a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Maximum.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,34 +28,48 @@ namespace using namespace testing; -TEST(MaximumTest, Float) +class MaximumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MaximumTest, Float) { Shape input_shape{3, 1, 2}; std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; - Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1); - Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2); + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43}; EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(MaximumTest, Uint8) +TEST_F(MaximumTest, Uint8) { Shape input_shape{3, 1, 2}; std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23}; std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1}; - Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1); - Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2); + Tensor input_tensor1 = + makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> ref_output_shape{2, 4}; diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp index 421632812..8e65e0d6d 100644 --- a/compiler/luci-interpreter/src/kernels/Mean.cpp +++ b/compiler/luci-interpreter/src/kernels/Mean.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/reduce.h> #include <stdexcept> @@ -28,7 +28,7 @@ namespace luci_interpreter namespace kernels { -static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params) +static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params) { params->axis_count = num_axes; for (int i = 0; i < num_axes; ++i) @@ -42,7 +42,7 @@ static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams * } // Returns the number of axes that will be reduced. Removes duplicates. -static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims) +static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims) { int reduction_count = num_axes; for (int i = 0; i < num_axes; ++i) @@ -63,7 +63,7 @@ static int getAxisReductionCount(const int *axes_data, int num_axes, int input_n return reduction_count; } -static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes, +static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes, bool keep_dims) { int input_num_dims = input_shape.num_dims(); @@ -123,8 +123,10 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int } } -Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams ¶ms) - : KernelWithParams<ReducerParams>({input, axes}, {output}, params) +Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms) + : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum}, + params) { } @@ -149,17 +151,28 @@ void Mean::configure() tflite::MeanParams params{}; resolveAxes(axes_data, num_axes, ¶ms); - const bool need_temporaries = !( + _need_temporaries = !( _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1))); - if (need_temporaries) + if (_need_temporaries) { - _temp_index = - std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, ""); - _resolved_axes = - std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, ""); - _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(), - AffineQuantization{}, ""); + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->resize(Shape(input_num_dims)); + resolved_axes->resize(Shape(num_axes)); + temp_sum->resize(output()->shape()); + } + else + { + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + + temp_index->set_allocatable(false); + resolved_axes->set_allocatable(false); + temp_sum->set_allocatable(false); } } @@ -179,12 +192,6 @@ void Mean::execute() const default: throw std::runtime_error("Unsupported type."); } - if (!!_temp_index) - _temp_index->deallocate(); - if (!!_resolved_axes) - _resolved_axes->deallocate(); - if (!!_temp_sum) - _temp_sum->deallocate(); } void Mean::evalFloat() const @@ -197,6 +204,10 @@ void Mean::evalFloat() const tflite::MeanParams params{}; resolveAxes(axes_data, num_axes, ¶ms); + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + // Defer to specialized implementation for 4D Mean across axes 1 & 2. if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && ((params.axis[0] == 1 && params.axis[1] == 2) || @@ -207,12 +218,12 @@ void Mean::evalFloat() const } else { - tflite::reference_ops::Mean( - getTensorData<float>(input()), getTensorShape(input()).DimsData(), - input()->shape().num_dims(), getTensorData<float>(output()), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData<int>(_temp_index.get()), - getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get())); + tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData<float>(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<float>(temp_sum)); } } @@ -226,6 +237,10 @@ void Mean::evalQuantized() const tflite::MeanParams params{}; resolveAxes(axes_data, num_axes, ¶ms); + auto temp_index = getOutputTensors()[1]; + auto resolved_axes = getOutputTensors()[2]; + auto temp_sum = getOutputTensors()[3]; + // Defer to specialized implementation for 4D Mean across axes 1 & 2. if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 && ((params.axis[0] == 1 && params.axis[1] == 2) || @@ -238,12 +253,12 @@ void Mean::evalQuantized() const } else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale()) { - tflite::reference_ops::Mean( - getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(), - input()->shape().num_dims(), getTensorData<uint8_t>(output()), - getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData<int>(_temp_index.get()), - getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get())); + tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(), + input()->shape().num_dims(), getTensorData<uint8_t>(output()), + getTensorShape(output()).DimsData(), output()->shape().num_dims(), + axes_data, num_axes, _params.keep_dims, + getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<int>(temp_sum)); } else { @@ -252,8 +267,8 @@ void Mean::evalQuantized() const getTensorShape(input()).DimsData(), input()->shape().num_dims(), getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(), getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes, - _params.keep_dims, getTensorData<int>(_temp_index.get()), - getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()), + _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), + getTensorData<int>(temp_sum), /*compute_sum=*/false); } } diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h index 1cc046894..ed07ae561 100644 --- a/compiler/luci-interpreter/src/kernels/Mean.h +++ b/compiler/luci-interpreter/src/kernels/Mean.h @@ -30,7 +30,8 @@ namespace kernels class Mean : public KernelWithParams<ReducerParams> { public: - Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams ¶ms); + Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index, + Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams ¶ms); const Tensor *input() const { return _inputs[0]; } const Tensor *axes() const { return _inputs[1]; } @@ -45,9 +46,7 @@ private: void evalQuantizedS16() const; private: - std::unique_ptr<Tensor> _temp_index; - std::unique_ptr<Tensor> _resolved_axes; - std::unique_ptr<Tensor> _temp_sum; + bool _need_temporaries = false; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp index fa0ba2169..d2c00935a 100644 --- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Mean.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,22 +28,39 @@ namespace using namespace testing; -TEST(MeanTest, FloatKeepDims) +class MeanTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MeanTest, FloatKeepDims) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; std::vector<int32_t> axis_data{0, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{10.5, 12.5, 14.5}; @@ -51,22 +69,31 @@ TEST(MeanTest, FloatKeepDims) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, FloatKeepDims4DMean) +TEST_F(MeanTest, FloatKeepDims4DMean) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; std::vector<int32_t> axis_data{1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{6, 7, 18, 19}; @@ -75,22 +102,31 @@ TEST(MeanTest, FloatKeepDims4DMean) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, FloatNotKeepDims) +TEST_F(MeanTest, FloatNotKeepDims) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; std::vector<int32_t> axis_data{1, 0, -3, -3}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ReducerParams params{}; params.keep_dims = false; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{12, 13}; @@ -99,23 +135,31 @@ TEST(MeanTest, FloatNotKeepDims) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, Uint8KeepDims) +TEST_F(MeanTest, Uint8KeepDims) { float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<int32_t> axis_data{1}; - Tensor input_tensor = - makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data); + Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, + input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::U8, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.3, 0.35, 0.55}; @@ -125,23 +169,31 @@ TEST(MeanTest, Uint8KeepDims) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, Uint8NotKeepDims) +TEST_F(MeanTest, Uint8NotKeepDims) { float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255); std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6}; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<int32_t> axis_data{1}; - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); ReducerParams params{}; params.keep_dims = false; - Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.4, 0.4}; @@ -151,7 +203,7 @@ TEST(MeanTest, Uint8NotKeepDims) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(MeanTest, SInt16KeepDims4D) +TEST_F(MeanTest, SInt16KeepDims4D) { std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, @@ -159,15 +211,24 @@ TEST(MeanTest, SInt16KeepDims4D) std::vector<int32_t> axes_data{1, 2}; std::vector<float> ref_output_data{6, 7, 18, 19}; - Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data); - Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get()); + Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get()); + Tensor temp_index(DataType::S32, Shape({}), {}, ""); + Tensor resolved_axes(DataType::S32, Shape({}), {}, ""); + Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0); ReducerParams params{}; params.keep_dims = true; - Mean kernel(&input_tensor, &axes_tensor, &output_tensor, params); + Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum, + params); kernel.configure(); + _memory_manager->allocate_memory(temp_index); + _memory_manager->allocate_memory(resolved_axes); + _memory_manager->allocate_memory(temp_sum); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2})); diff --git a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp index b6420dd9b..9a143643f 100644 --- a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Minimum.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,34 +28,48 @@ namespace using namespace testing; -TEST(MinimumTest, Float) +class MinimumTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MinimumTest, Float) { Shape input_shape{3, 1, 2}; std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; - Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1); - Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2); + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}; EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(MinimumTest, Uint8) +TEST_F(MinimumTest, Uint8) { Shape input_shape{3, 1, 2}; std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23}; std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1}; - Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1); - Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2); + Tensor input_tensor1 = + makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> ref_output_shape{2, 4}; diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp index 1139167e0..89049c96c 100644 --- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/pad.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp new file mode 100644 index 000000000..de9da5051 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// TODO: Add tests for MirrorPad diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp index 4e6e3f75a..bc855de0f 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.cpp @@ -20,7 +20,9 @@ #include "kernels/BinaryOpCommon.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALMul.h" + +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> #include <stdexcept> @@ -77,15 +79,15 @@ void Mul::evalFloat() const if (need_broadcast) { - tflite::optimized_ops::BroadcastMul4DSlow( + luci_interpreter_pal::BroadcastMul4DSlow( params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()), getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output())); } else { - tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), - getTensorShape(input2()), getTensorData<float>(input2()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); } } diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp index fc7ffb5a1..471f6ac86 100644 --- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Mul.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(MulTest, Float) +class MulTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(MulTest, Float) { Shape base_shape = {2, 3, 1, 2}; std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; @@ -45,8 +54,10 @@ TEST(MulTest, Float) std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); MulParams params{}; @@ -54,6 +65,7 @@ TEST(MulTest, Float) Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) @@ -62,8 +74,10 @@ TEST(MulTest, Float) // Re-run with exchanged inputs. for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); MulParams params{}; @@ -71,6 +85,7 @@ TEST(MulTest, Float) Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) @@ -78,7 +93,7 @@ TEST(MulTest, Float) } } -TEST(MulTest, SInt16) +TEST_F(MulTest, SInt16) { Shape base_shape = {2, 3, 1, 2}; std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; @@ -99,9 +114,10 @@ TEST(MulTest, SInt16) {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}}; for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data); - Tensor input2_tensor = - makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data); + Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data, + _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, + input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0); const float tolerance = output_tensor.scale() * 2; @@ -110,6 +126,7 @@ TEST(MulTest, SInt16) Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), @@ -121,9 +138,10 @@ TEST(MulTest, SInt16) // Re-run with exchanged inputs and different scales. for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = - makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data); - Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data); + Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, + input2_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data, + _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0); const float tolerance = output_tensor.scale() * 2; @@ -132,6 +150,7 @@ TEST(MulTest, SInt16) Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-interpreter/src/kernels/Neg.cpp index 99f4d4a21..c6fe08a9e 100644 --- a/compiler/luci-interpreter/src/kernels/Neg.cpp +++ b/compiler/luci-interpreter/src/kernels/Neg.cpp @@ -17,7 +17,7 @@ #include "kernels/Neg.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALNeg.h" #include <stdexcept> @@ -50,8 +50,8 @@ void Neg::execute() const void Neg::evalFloat() const { - tflite::reference_ops::Negate(getTensorShape(input()), getTensorData<float>(input()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-interpreter/src/kernels/Neg.test.cpp index 33256e1c6..8b2bc1a82 100644 --- a/compiler/luci-interpreter/src/kernels/Neg.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Neg.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Neg.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,13 +32,16 @@ template <typename T> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, std::initializer_list<T> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); Neg kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h index d729c6c14..247874df7 100644 --- a/compiler/luci-interpreter/src/kernels/NotEqual.h +++ b/compiler/luci-interpreter/src/kernels/NotEqual.h @@ -42,9 +42,9 @@ private: private: int32_t _x_multiplier = 0; - int32_t _x_shift = 0; + int _x_shift = 0; int32_t _y_multiplier = 0; - int32_t _y_shift = 0; + int _y_shift = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp index f9dc7781b..763f86893 100644 --- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp +++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp @@ -17,6 +17,7 @@ #include "kernels/NotEqual.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(NotEqualTest, FloatSimple) +class NotEqualTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(NotEqualTest, FloatSimple) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -44,19 +53,20 @@ TEST(NotEqualTest, FloatSimple) true, false, true, // Row 2 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(NotEqualTest, FloatBroardcast) +TEST_F(NotEqualTest, FloatBroardcast) { std::vector<float> x_data{ 0.5, 0.7, 0.9, // Row 1 @@ -76,12 +86,13 @@ TEST(NotEqualTest, FloatBroardcast) false, false, false, // Row 4 }; - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); @@ -92,7 +103,7 @@ TEST(NotEqualTest, FloatBroardcast) const float F_MIN = -128.0 / 128.0; const float F_MAX = 127.0 / 128.0; -TEST(NotEqualTest, Uint8Quantized) +TEST_F(NotEqualTest, Uint8Quantized) { std::vector<float> x_data{ 0.5, 0.5, 0.7, 0.9, // Row 1 @@ -110,24 +121,25 @@ TEST(NotEqualTest, Uint8Quantized) }; std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get()); std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(NotEqualTest, Uint8QuantizedBroadcast) +TEST_F(NotEqualTest, Uint8QuantizedBroadcast) { std::vector<float> x_data{ 0.4, -0.8, 0.7, 0.3, // Row 1 @@ -148,34 +160,35 @@ TEST(NotEqualTest, Uint8QuantizedBroadcast) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX); - Tensor x_tensor = - makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data); - Tensor y_tensor = - makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data); + Tensor x_tensor = makeInputTensor<DataType::U8>( + {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>( + {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1})); EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data)); } -TEST(NotEqualTest, Input_Type_Mismatch_NEG) +TEST_F(NotEqualTest, Input_Type_Mismatch_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(NotEqualTest, Input_Output_Type_NEG) +TEST_F(NotEqualTest, Input_Output_Type_NEG) { - Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); NotEqual kernel(&x_tensor, &y_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-interpreter/src/kernels/PRelu.cpp index a53ac6f80..5a6b05c3a 100644 --- a/compiler/luci-interpreter/src/kernels/PRelu.cpp +++ b/compiler/luci-interpreter/src/kernels/PRelu.cpp @@ -19,7 +19,8 @@ #include "kernels/BinaryOpCommon.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/binary_function.h> +#include <tensorflow/lite/kernels/internal/reference/prelu.h> #include <stdexcept> @@ -168,10 +169,11 @@ static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val, constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max(); const int32_t output_val = - input_val >= 0 ? tflite::MultiplyByQuantizedMultiplier(input_val, identity_mult.multiplier, - identity_mult.shift) - : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val, - alpha_mult.multiplier, alpha_mult.shift); + input_val >= 0 + ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val), + identity_mult.multiplier, identity_mult.shift) + : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val), + alpha_mult.multiplier, alpha_mult.shift); const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val)); return clamped_output; } diff --git a/compiler/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-interpreter/src/kernels/PRelu.h index e85c3f7e9..f7735d418 100644 --- a/compiler/luci-interpreter/src/kernels/PRelu.h +++ b/compiler/luci-interpreter/src/kernels/PRelu.h @@ -50,7 +50,7 @@ private: std::vector<ChannelQuantMultipliers> _alpha_multipliers; // TODO merge this into one ChannelQuantMultiplier object int32_t _output_multiplier_identity = 0; - int32_t _output_shift_identity = 0; + int _output_shift_identity = 0; }; } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp index 3dbc51cc1..6d97382de 100644 --- a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp @@ -17,6 +17,7 @@ #include "kernels/PRelu.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,14 +33,18 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, std::initializer_list<T> alpha_data, std::initializer_list<T> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); - Tensor alpha_tensor = makeInputTensor<element_type>(alpha_shape, alpha_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -97,6 +102,7 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; } TEST(PReluTest, Uint8Simple) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f}; std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f}; std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f}; @@ -104,14 +110,15 @@ TEST(PReluTest, Uint8Simple) float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data); - Tensor alpha_tensor = - makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -147,14 +154,16 @@ TEST(PReluTest, Uint8Broadcast) const float kMax = 127.f / 128.f; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first, quant_param.second, input_data); - Tensor alpha_tensor = - makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>( + {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -166,12 +175,15 @@ TEST(PReluTest, Uint8Broadcast) TEST(PReluTest, SInt16_LWQ_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); // Rewrite this test in case layer-wise quantization for sint16 is supported std::vector<float> input_data(6); // data is not important std::vector<float> alpha_data(6); - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data); - Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); @@ -180,18 +192,22 @@ TEST(PReluTest, SInt16_LWQ_NEG) TEST(PReluTest, SInt16_CWQ_Simple) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; std::vector<float> alpha_data{0.5f, 0.25f}; std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; std::vector<float> alpha_scales{0.05f, 0.025f}; std::vector<int32_t> zerop{0, 0}; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data); - Tensor alpha_tensor = makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); @@ -200,14 +216,16 @@ TEST(PReluTest, SInt16_CWQ_Simple) TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data(6); // data is not important std::vector<float> alpha_data(6); std::vector<float> alpha_scales{0.25f, 0.05f}; std::vector<int32_t> zerop{0, 0}; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data); - Tensor alpha_tensor = - makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, alpha_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); @@ -216,14 +234,16 @@ TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG) TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data(6); // data is not important std::vector<float> alpha_data(6); std::vector<float> alpha_scales{0.25f}; std::vector<int32_t> zerop{0}; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data); - Tensor alpha_tensor = - makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, alpha_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, + alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); @@ -232,19 +252,22 @@ TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG) TEST(PReluTest, SInt16_CWQ_uneven_shape1) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f}; std::vector<float> alpha_data{0.5f, 0.25f}; std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f}; std::vector<float> alpha_scales{0.05f, 0.025f}; std::vector<int32_t> zerop{0, 0}; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data); - Tensor alpha_tensor = - makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, alpha_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, + alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2})); @@ -253,6 +276,7 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape1) TEST(PReluTest, SInt16_CWQ_uneven_shape2) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{ 0.0f, 0.0f, 0.0f, // Row 1, Column 1 0.5f, 0.5f, 0.5f, // Row 1, Column 2 @@ -269,13 +293,15 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape2) std::vector<float> alpha_scales{1.f, 0.05f, 0.1f}; std::vector<int32_t> zerop{0, 0, 0}; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data); - Tensor alpha_tensor = - makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, alpha_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, + alpha_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3})); @@ -284,8 +310,9 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape2) TEST(PReluTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); @@ -294,8 +321,9 @@ TEST(PReluTest, Input_Output_Type_NEG) TEST(PReluTest, Input_Alpha_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); @@ -304,23 +332,29 @@ TEST(PReluTest, Input_Alpha_Type_NEG) TEST(PReluTest, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); - Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); + Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } TEST(PReluTest, Input_Output_U8_CWQ_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> scales{1.f, 1.f}; std::vector<int32_t> zerop{0, 0}; std::vector<float> dummy_data(4, 0.f); - Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data); - Tensor alpha_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data); - Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); @@ -328,12 +362,16 @@ TEST(PReluTest, Input_Output_U8_CWQ_NEG) TEST(PReluTest, Input_Output_S16_CWQ_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> scales{1.f, 1.f}; std::vector<int32_t> zerop{0, 0}; std::vector<float> dummy_data(4, 0.f); - Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data); - Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data); - Tensor output_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get()); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); @@ -341,10 +379,14 @@ TEST(PReluTest, Input_Output_S16_CWQ_NEG) TEST(PReluTest, Mixing_U8_S16_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> dummy_data(4, 0.f); - Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data); - Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data); - Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor alpha_tensor = + makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); + Tensor output_tensor = + makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get()); PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp index 092bd449a..90a0f894e 100644 --- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Pack.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,6 +32,7 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes, std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas, std::initializer_list<T> output_data, int32_t axis) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); std::vector<const Tensor *> inputs(input_datas.size()); std::vector<Tensor> tmp_inputs; @@ -39,11 +41,13 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes, if (std::is_same<T, float>::value) { tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); } else { tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, "")); + memory_manager->allocate_memory(tmp_inputs[i]); tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T)); } } @@ -64,6 +68,7 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes, Pack kernel(inputs, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -103,12 +108,13 @@ TYPED_TEST(PackTest, NegAxis) TEST(Pack, MismatchingInputValuesCount_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input1_data{1, 4}; std::vector<float> input2_data{2, 5}; std::vector<float> input3_data{3, 6}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data); - Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); PackParams params{}; { @@ -122,12 +128,13 @@ TEST(Pack, MismatchingInputValuesCount_NEG) TEST(Pack, InvalidInputAxis_NEG) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input1_data{1, 4}; std::vector<float> input2_data{2, 5}; std::vector<float> input3_data{3, 6}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data); - Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get()); + Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); PackParams params{}; { diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp index 3e76080a9..700448e7a 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/pad.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp index 75b2e560e..7994263e2 100644 --- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Pad.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,17 +31,20 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; } TEST(Pad, Uint8) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0, @@ -52,14 +56,18 @@ TEST(Pad, Uint8) TEST(Pad, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{1, 2, 3, 4, 5, 6}; std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pad kernel(&input_tensor, &paddings_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp index 3c215dbca..e90469239 100644 --- a/compiler/luci-interpreter/src/kernels/PadV2.cpp +++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/pad.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp index 1ee741401..41efaff06 100644 --- a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp +++ b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp @@ -16,6 +16,7 @@ #include "kernels/PadV2.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,20 +31,23 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; } TEST(PadV2, Uint8) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f); std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}; std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0}; std::vector<float> constant_values_data{0.5}; - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data); - Tensor constant_values = - makeInputTensor<DataType::U8>({1}, quant_param.first, quant_param.second, constant_values_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = makeInputTensor<DataType::U8>( + {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data = { @@ -56,16 +60,21 @@ TEST(PadV2, Uint8) TEST(PadV2, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); std::vector<float> input_data{1, 2, 3, 4, 5, 6}; std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0}; std::vector<float> constant_values_data{7}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data); - Tensor constant_values = makeInputTensor<DataType::FLOAT32>({1}, constant_values_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get()); + Tensor constant_values = + makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, diff --git a/compiler/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-interpreter/src/kernels/Pow.test.cpp index a414440c9..0e858115d 100644 --- a/compiler/luci-interpreter/src/kernels/Pow.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Pow.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Pow.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,7 +27,15 @@ namespace using namespace testing; -TEST(PowTest, SimplePow) +class PowTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(PowTest, SimplePow) { std::initializer_list<int32_t> base_shape = {1, 1, 3, 2}; @@ -34,19 +43,22 @@ TEST(PowTest, SimplePow) std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); } -TEST(PowTest, FloatBroadcastPow) +TEST_F(PowTest, FloatBroadcastPow) { std::initializer_list<int32_t> input1_shape = {1, 3}; std::initializer_list<int32_t> input2_shape = {3, 1}; @@ -56,60 +68,66 @@ TEST(PowTest, FloatBroadcastPow) std::vector<float> test_outputs{0.786f, 1.18126f, 0.9791f, 0.6968f, 1.28386f, 0.96888f, 0.6178f, 1.3953f, 0.9587f}; - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f)); } -TEST(PowTest, IntPow) +TEST_F(PowTest, IntPow) { std::initializer_list<int32_t> base_shape = {1, 3}; std::vector<int32_t> input_data{2, 3, 4}; std::vector<int32_t> test_outputs{4, 27, 256}; - Tensor input1_tensor = makeInputTensor<DataType::S32>(base_shape, input_data); - Tensor input2_tensor = makeInputTensor<DataType::S32>(base_shape, input_data); + Tensor input1_tensor = + makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs)); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape)); } -TEST(PowTest, Input_Output_Type_NEG) +TEST_F(PowTest, Input_Output_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::BOOL); Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(PowTest, Input_Type_Mismatch_NEG) +TEST_F(PowTest, Input_Type_Mismatch_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(PowTest, Invalid_Input_Type_NEG) +TEST_F(PowTest, Invalid_Input_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}); - Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}); + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Pow kernel(&input1_tensor, &input2_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp index b5acf1d60..747ec6cc8 100644 --- a/compiler/luci-interpreter/src/kernels/Relu.cpp +++ b/compiler/luci-interpreter/src/kernels/Relu.cpp @@ -17,7 +17,7 @@ #include "kernels/Relu.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALRelu.h" #include <stdexcept> @@ -70,7 +70,7 @@ void Relu::evalFloat() const auto output_data = getTensorData<float>(output()); auto output_shape = getTensorShape(output()); - tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data); + luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data); } void Relu::evalQuantized() const @@ -85,8 +85,8 @@ void Relu::evalQuantized() const std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset); params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max()); - tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), - getTensorShape(output()), getTensorData<uint8_t>(output())); + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); } void Relu::evalQuantizedS16() const diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp index 6623a5b77..bd32e3cc9 100644 --- a/compiler/luci-interpreter/src/kernels/Relu.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Relu.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(ReluTest, FloatSimple) +class ReluTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(ReluTest, FloatSimple) { std::vector<float> input_data{ 0.0f, 1.0f, 3.0f, // Row 1 @@ -39,11 +48,13 @@ TEST(ReluTest, FloatSimple) 1.0f, 0.0f, 0.0f, // Row 2 }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Relu kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -51,7 +62,7 @@ TEST(ReluTest, FloatSimple) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(ReluTest, Uint8Quantized) +TEST_F(ReluTest, Uint8Quantized) { std::vector<float> input_data{ 0, -6, 2, 4, // @@ -62,12 +73,13 @@ TEST(ReluTest, Uint8Quantized) const float f_max = (127.0 / 128.0) * 8; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); Relu kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); @@ -76,7 +88,7 @@ TEST(ReluTest, Uint8Quantized) EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); } -TEST(ReluTest, Uint8Requantized) +TEST_F(ReluTest, Uint8Requantized) { std::vector<float> input_data{ 0, -6, 2, 4, // @@ -90,14 +102,15 @@ TEST(ReluTest, Uint8Requantized) const float out_max = (255.0 / 256.0) * 8; std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); Relu kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); @@ -106,7 +119,7 @@ TEST(ReluTest, Uint8Requantized) EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1})); } -TEST(ReluTest, SInt16) +TEST_F(ReluTest, SInt16) { std::vector<float> input_data{ 0, -6, 2, 4, // @@ -117,33 +130,36 @@ TEST(ReluTest, SInt16) 3, 0, 7, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0); Relu kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(ReluTest, Input_Output_Type_NEG) +TEST_F(ReluTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Relu kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(ReluTest, Invalid_Input_Type_NEG) +TEST_F(ReluTest, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Relu kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp index fa7aa504a..07205ed3a 100644 --- a/compiler/luci-interpreter/src/kernels/Relu6.cpp +++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp @@ -17,7 +17,7 @@ #include "kernels/Relu6.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALRelu6.h" #include <stdexcept> @@ -63,7 +63,7 @@ void Relu6::evalFloat() const auto output_data = getTensorData<float>(output()); auto output_shape = getTensorShape(output()); - tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data); + luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data); } void Relu6::evalQuantized() const @@ -80,8 +80,8 @@ void Relu6::evalQuantized() const std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()), params.output_offset + static_cast<int32>(roundf(6.f / output()->scale()))); - tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), - getTensorShape(output()), getTensorData<uint8_t>(output())); + luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()), + getTensorShape(output()), getTensorData<uint8_t>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp index fe991389a..af7b3f3db 100644 --- a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Relu6.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,7 +28,15 @@ namespace using namespace testing; -TEST(Relu6Test, FloatSimple) +class Relu6Test : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(Relu6Test, FloatSimple) { std::vector<float> input_data{ 0.0f, 1.0f, 3.0f, // Row 1 @@ -39,11 +48,13 @@ TEST(Relu6Test, FloatSimple) 6.0f, 0.0f, 0.0f, // Row 2 }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Relu6 kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), @@ -51,7 +62,7 @@ TEST(Relu6Test, FloatSimple) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3})); } -TEST(Relu6Test, Uint8Quantized) +TEST_F(Relu6Test, Uint8Quantized) { // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float f_min = (-128.0 / 128.0) * 10; @@ -64,12 +75,13 @@ TEST(Relu6Test, Uint8Quantized) }; std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second); Relu6 kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); @@ -79,7 +91,7 @@ TEST(Relu6Test, Uint8Quantized) FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); } -TEST(Relu6Test, Uint8Requantized) +TEST_F(Relu6Test, Uint8Requantized) { // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors. const float in_min = (-128.0 / 128.0) * 10; @@ -94,14 +106,15 @@ TEST(Relu6Test, Uint8Requantized) }; std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get()); std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second); Relu6 kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1})); @@ -111,22 +124,23 @@ TEST(Relu6Test, Uint8Requantized) FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance)); } -TEST(Relu6Test, Input_Output_Type_NEG) +TEST_F(Relu6Test, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Relu6 kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(Relu6Test, Invalid_Input_Type_NEG) +TEST_F(Relu6Test, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Relu6 kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp index 38159380f..c2ff3ea1b 100644 --- a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Reshape.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,37 +27,51 @@ namespace using namespace testing; +class ReshapeTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + // TODO Test types other than FLOAT32. -TEST(ReshapeTest, Regular) +TEST_F(ReshapeTest, Regular) { Shape input_shape{1, 2, 2, 3}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape shape_shape{2}; std::vector<int32_t> shape_data{3, 4}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); } -TEST(ReshapeTest, UnknownDimension) +TEST_F(ReshapeTest, UnknownDimension) { Shape input_shape{2, 1, 2, 3}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape shape_shape{3}; std::vector<int32_t> shape_data{2, -1, 2}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); + Tensor shape_tensor = + makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Reshape kernel(&input_tensor, &shape_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data)); diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp index 0e9bcc920..e2ddd6a7b 100644 --- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp +++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALResizeBilinear.h" namespace luci_interpreter { @@ -56,12 +56,12 @@ void ResizeBilinear::execute() const switch (output()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::ResizeBilinear( + luci_interpreter_pal::ResizeBilinear( op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::ResizeBilinear( + luci_interpreter_pal::ResizeBilinear( op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output())); break; diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp index 68ef6e6c1..7af20f8c4 100644 --- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp @@ -17,6 +17,7 @@ #include "kernels/ResizeBilinear.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -33,8 +34,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data, bool align_corners, bool half_pixel_centers) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeBilinearParams params{}; @@ -43,6 +46,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -60,8 +64,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, { // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero // point 0. - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data); - Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0); ResizeBilinearParams params{}; @@ -70,6 +77,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -152,13 +160,17 @@ TEST(ResizeBilinearTest, HalfPixelCenterUint8Test) TEST(ResizeBilinearTest, InputShapeInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeBilinearParams params{}; @@ -171,13 +183,17 @@ TEST(ResizeBilinearTest, InputShapeInvalid_NEG) TEST(ResizeBilinearTest, SizeShapeInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeBilinearParams params{}; @@ -190,13 +206,17 @@ TEST(ResizeBilinearTest, SizeShapeInvalid_NEG) TEST(ResizeBilinearTest, SizeDimInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeBilinearParams params{}; @@ -209,13 +229,17 @@ TEST(ResizeBilinearTest, SizeDimInvalid_NEG) TEST(ResizeBilinearTest, InvalidParams_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeBilinearParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp index c52264997..306cefbc2 100644 --- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp +++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp @@ -19,8 +19,8 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h> +#include "PALResizeNearestNeighbor.h" namespace luci_interpreter { @@ -61,7 +61,7 @@ void ResizeNearestNeighbor::execute() const getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output())); break; case DataType::U8: - tflite::optimized_ops::ResizeNearestNeighbor( + luci_interpreter_pal::ResizeNearestNeighbor( op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output())); break; diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp index 0b36a29af..0e9017c78 100644 --- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp @@ -17,6 +17,7 @@ #include "kernels/ResizeNearestNeighbor.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -33,8 +34,11 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data, bool align_corners, bool half_pixel_centers) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeNearestNeighborParams params{}; @@ -43,6 +47,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -58,12 +63,14 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, std::initializer_list<float> output_data, bool align_corners, bool half_pixel_centers) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f, std::max(input_data) > 0 ? std::max(input_data) : 0.f); - Tensor input_tensor = - makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data); - Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first); ResizeNearestNeighborParams params{}; @@ -72,6 +79,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape, ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -151,13 +159,17 @@ TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest) TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeNearestNeighborParams params{}; @@ -170,13 +182,17 @@ TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG) TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeNearestNeighborParams params{}; @@ -189,13 +205,17 @@ TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG) TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, { - 3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }); - Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, + { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }, + memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); ResizeNearestNeighborParams params{}; diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp index 6e1e6c03c..2bd94875b 100644 --- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp +++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp @@ -17,6 +17,7 @@ #include "kernels/ReverseV2.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -36,6 +37,8 @@ TYPED_TEST_CASE(ReverseV2Test, DataTypes); TYPED_TEST(ReverseV2Test, MultiDimensions) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + // TypeParam std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; @@ -47,13 +50,15 @@ TYPED_TEST(ReverseV2Test, MultiDimensions) 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20}; std::vector<int32_t> output_shape{4, 3, 2}; - Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data); - Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data); + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp index b93a04ddd..3c6494232 100644 --- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Rsqrt.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,11 +30,15 @@ using namespace testing; void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Rsqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); @@ -58,7 +63,9 @@ TEST(RsqrtTest, SimpleRsqrt) TEST(RsqrtTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); Rsqrt kernel(&input_tensor, &output_tensor); @@ -67,11 +74,14 @@ TEST(RsqrtTest, Input_Output_Type_NEG) TEST(RsqrtTest, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Rsqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp index 626521815..37a834a18 100644 --- a/compiler/luci-interpreter/src/kernels/Slice.cpp +++ b/compiler/luci-interpreter/src/kernels/Slice.cpp @@ -16,7 +16,7 @@ #include "kernels/Slice.h" #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSlice.h" #include <cassert> #include <cstring> @@ -131,14 +131,13 @@ void Slice::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::Slice(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()), + getTensorShape(output()), getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::Slice(op_params, getTensorShape(input()), - getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + luci_interpreter_pal::Slice(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); break; default: throw std::runtime_error("Unsupported input type."); diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp index a360a29cc..3e0d0b0d7 100644 --- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Slice.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -35,6 +36,8 @@ TYPED_TEST_CASE(SliceTest, DataTypes); TYPED_TEST(SliceTest, SimpleTest) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}; Shape input_shape{3, 2, 3, 1}; std::vector<int32_t> begin_data{1, 0, 0, 0}; @@ -44,14 +47,17 @@ TYPED_TEST(SliceTest, SimpleTest) std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5}; std::vector<int32_t> output_shape{2, 1, 3, 1}; - Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data); - Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data); - Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data); + Tensor input_tensor = + makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>()); Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp index 8e29f53ee..c230aaa70 100644 --- a/compiler/luci-interpreter/src/kernels/Softmax.cpp +++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" #include <tensorflow/lite/kernels/internal/reference/softmax.h> -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSoftmax.h" #include <stdexcept> @@ -40,10 +40,12 @@ void Softmax::configure() LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1); if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8) { - LUCI_INTERPRETER_CHECK(output()->zero_point() == 0); + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0); + LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 || + output()->zero_point() == std::numeric_limits<int8_t>::min()); tflite::SoftmaxParams op_params{}; op_params.table = _table; - tflite::optimized_ops::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta); + luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta); } output()->resize(input()->shape()); } @@ -81,9 +83,9 @@ template <typename T> void Softmax::evalQuantized() const op_params.table = const_cast<float *>(_table); op_params.zero_point = output()->zero_point(); op_params.scale = output()->scale(); - - tflite::optimized_ops::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()), - getTensorShape(output()), getTensorData<T>(output())); + luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta); + luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()), + getTensorShape(output()), getTensorData<T>(output())); } } // namespace kernels diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp index c69a2f9cc..9de40b6ec 100644 --- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Softmax.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -26,46 +27,60 @@ namespace using namespace testing; -template <typename T> +template <typename T> constexpr loco::DataType toLocoDataType(); + +template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; } + +template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; } + +template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; } + +template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true> void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get()); + Tensor output_tensor = makeOutputTensor(toLocoDataType<T>()); SoftmaxParams params{}; params.beta = 0.1; Softmax kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data)); EXPECT_THAT(extractTensorShape(output_tensor), output_shape); } -template <> -void Check<uint8_t>(std::initializer_list<int32_t> input_shape, - std::initializer_list<int32_t> output_shape, - std::initializer_list<float> input_data, - std::initializer_list<float> output_data) +template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true> +void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, + std::initializer_list<float> input_data, std::initializer_list<float> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::pair<float, int32_t> input_quant_param = - quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f), - std::max<float>(std::max<float>(input_data), 0.f)); + quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f), + std::max<float>(std::max<float>(input_data), 0.f)); std::pair<float, int32_t> output_quant_param = - quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f), - std::max<float>(std::max<float>(output_data), 0.f)); - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, - input_quant_param.second, input_data); + quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f), + std::max<float>(std::max<float>(output_data), 0.f)); + Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first, + input_quant_param.second, input_data, + memory_manager.get()); Tensor output_tensor = - makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); + makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second); SoftmaxParams params{}; params.beta = 0.1; Softmax kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape)); @@ -77,7 +92,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test { }; -using DataTypes = ::testing::Types<float, uint8_t>; +using DataTypes = ::testing::Types<float, uint8_t, int8_t>; TYPED_TEST_CASE(SoftmaxTest, DataTypes); TYPED_TEST(SoftmaxTest, Simple) diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp index 2f6a47925..630cd38c4 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp @@ -18,7 +18,7 @@ #include "kernels/SpaceToBatchND.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSpaceToBatchND.h" #include <stdexcept> @@ -80,7 +80,7 @@ void SpaceToBatchND::execute() const tflite::SpaceToBatchParams op_params; case DataType::FLOAT32: op_params.output_offset = 0; - tflite::optimized_ops::SpaceToBatchND( + luci_interpreter_pal::SpaceToBatchND( op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()), @@ -88,7 +88,7 @@ void SpaceToBatchND::execute() const break; case DataType::U8: op_params.output_offset = output()->zero_point(); - tflite::optimized_ops::SpaceToBatchND( + luci_interpreter_pal::SpaceToBatchND( op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()), getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()), diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp index a6ec6f23f..e06501c8c 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp @@ -16,6 +16,7 @@ #include "kernels/SpaceToBatchND.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -34,14 +35,19 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_data, std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); - Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -55,17 +61,23 @@ void Check<uint8_t>( std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data, std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(std::min(input_data), std::max(input_data)); - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, - input_quant_param.second, input_data); - Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data); - Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second, + input_data, memory_manager.get()); + Tensor block_shape_tensor = + makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second); SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -93,10 +105,13 @@ TYPED_TEST(SpaceToBatchNDTest, Simple) TEST(SpaceToBatchNDTest, Invalid_Shape_NEG) { - Tensor input_tensor = - makeInputTensor<DataType::FLOAT32>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}); - Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}); - Tensor paddings_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get()); + Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get()); + Tensor paddings_tensor = + makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp index fc999372a..7c29e8cb0 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp @@ -16,7 +16,7 @@ #include "SpaceToDepth.h" #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSpaceToDepth.h" namespace luci_interpreter { @@ -61,14 +61,14 @@ void SpaceToDepth::execute() const switch (input()->element_type()) { case DataType::FLOAT32: - tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()), - getTensorData<float>(input()), getTensorShape(output()), - getTensorData<float>(output())); + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData<float>(input()), getTensorShape(output()), + getTensorData<float>(output())); break; case DataType::U8: - tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()), - getTensorData<uint8_t>(input()), getTensorShape(output()), - getTensorData<uint8_t>(output())); + luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()), + getTensorData<uint8_t>(input()), getTensorShape(output()), + getTensorData<uint8_t>(output())); break; default: throw std::runtime_error("Unsupported type."); diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp index 77b6655dc..735c010b9 100644 --- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp @@ -16,6 +16,7 @@ #include "kernels/SpaceToDepth.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -35,10 +36,13 @@ TYPED_TEST_CASE(SpaceToDepthTest, DataTypes); TYPED_TEST(SpaceToDepthTest, SimpleCase) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<TypeParam>(); std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8}; Shape input_shape{1, 2, 2, 2}; - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8}; std::vector<int32_t> output_shape{1, 1, 1, 8}; Tensor output_tensor = makeOutputTensor(element_type); @@ -48,6 +52,7 @@ TYPED_TEST(SpaceToDepthTest, SimpleCase) SpaceToDepth kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<TypeParam>(output_tensor), diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp index 0da0f3779..1a563f307 100644 --- a/compiler/luci-interpreter/src/kernels/Split.cpp +++ b/compiler/luci-interpreter/src/kernels/Split.cpp @@ -18,7 +18,7 @@ #include "Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> +#include "PALSplit.h" namespace luci_interpreter { @@ -56,11 +56,11 @@ void Split::execute() const params.num_split = _outputs.size(); params.axis = _axis_value; -#define TF_LITE_SPLIT(scalar) \ - { \ - VectorOfTensors<scalar, false> all_outputs(_outputs); \ - tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ - all_outputs.shapes(), all_outputs.data()); \ +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors<scalar, false> all_outputs(_outputs); \ + luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ + all_outputs.shapes(), all_outputs.data()); \ } switch (input()->element_type()) diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp index c558928e8..74d57aed3 100644 --- a/compiler/luci-interpreter/src/kernels/Split.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Split.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,9 +33,12 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, std::vector<std::vector<T>> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); - Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get()); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); std::vector<Tensor> output_tensors; output_tensors.reserve(num_splits); @@ -51,6 +55,10 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape, Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs)); kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } kernel.execute(); for (int i = 0; i < num_splits; ++i) diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp new file mode 100644 index 000000000..281988272 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SplitV.h" + +#include "Utils.h" + +#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h> + +namespace luci_interpreter +{ +namespace kernels +{ + +SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector<Tensor *> outputs) + : Kernel({input, size_splits, axis}, std::move(outputs)) +{ +} + +void SplitV::configure() +{ + assert(axis()->shape().num_elements() == 1); + _axis_value = getTensorData<int32_t>(axis())[0]; + if (_axis_value < 0) + _axis_value += input()->shape().num_dims(); + assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims()); + + auto num_split = static_cast<int32_t>(_outputs.size()); + auto sizes_data = getTensorData<int32_t>(size_splits()); + + assert(size_splits()->shape().num_dims() == 1); + assert(size_splits()->shape().num_elements() == num_split); + assert(std::accumulate(sizes_data, sizes_data + num_split, 0) == + input()->shape().dim(_axis_value)); + + auto output_shape = input()->shape(); + for (int32_t i = 0; i < num_split; ++i) + { + output_shape.dim(_axis_value) = sizes_data[i]; + _outputs[i]->resize(output_shape); + } +} + +void SplitV::execute() const +{ + tflite::SplitParams params{}; + params.num_split = _outputs.size(); + params.axis = _axis_value; + +#define TF_LITE_SPLIT(scalar) \ + { \ + VectorOfTensors<scalar, false> all_outputs(_outputs); \ + tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \ + all_outputs.shapes(), all_outputs.data()); \ + } + + switch (input()->element_type()) + { + case DataType::FLOAT32: + TF_LITE_SPLIT(float); + break; + case DataType::U8: + TF_LITE_SPLIT(uint8_t); + break; + case DataType::S16: + TF_LITE_SPLIT(int16_t); + break; + default: + throw std::runtime_error("Unsupported type."); + } +#undef TF_LITE_SPLIT +} + +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-interpreter/src/kernels/SplitV.h new file mode 100644 index 000000000..92f6288fb --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SplitV.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H +#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H + +#include "core/Kernel.h" +#include "core/KernelParams.h" + +namespace luci_interpreter +{ +namespace kernels +{ + +class SplitV : public Kernel +{ +public: + SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis, + std::vector<Tensor *> outputs); + + const Tensor *input() const { return _inputs[0]; } + const Tensor *size_splits() const { return _inputs[1]; } + const Tensor *axis() const { return _inputs[2]; } + Tensor *output(int index) const { return _outputs[index]; } + + void configure() override; + void execute() const override; + +private: + int32_t _axis_value{}; +}; + +} // namespace kernels +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp new file mode 100644 index 000000000..aac0567d7 --- /dev/null +++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernels/SplitV.h" +#include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" + +namespace luci_interpreter +{ +namespace kernels +{ +namespace +{ + +using namespace testing; + +template <typename T> +void Check(int axis, std::initializer_list<int32_t> splits_size, + std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data, + std::vector<std::vector<T>> output_data) +{ + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); + + auto num_splits = static_cast<int32_t>(splits_size.size()); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor sizes_tensor = + makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get()); + Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get()); + + std::vector<Tensor> output_tensors; + output_tensors.reserve(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensors.emplace_back(makeOutputTensor(element_type)); + } + + std::vector<Tensor *> output_tensor_ptrs(num_splits); + for (int i = 0; i < num_splits; ++i) + { + output_tensor_ptrs[i] = &output_tensors[i]; + } + + SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs)); + kernel.configure(); + for (int i = 0; i < num_splits; ++i) + { + memory_manager->allocate_memory(output_tensors[i]); + } + kernel.execute(); + + for (int i = 0; i < num_splits; ++i) + { + auto tmp = extractTensorData<T>(output_tensors[i]); + EXPECT_THAT(extractTensorData<T>(output_tensors[i]), + ::testing::ElementsAreArray(output_data[i])); + } +} + +template <typename T> class SplitVTest : public ::testing::Test +{ +}; + +using DataTypes = ::testing::Types<float, uint8_t, int16_t>; +TYPED_TEST_CASE(SplitVTest, DataTypes); + +TYPED_TEST(SplitVTest, ThreeDimensional) +{ + Check<TypeParam>( + /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 4, 5, 6, 7, 8, 9}, // + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} // + }); + Check<TypeParam>( + /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 2, 3, 10, 11, 12, 19, 20, 21}, // + {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} // + }); + Check<TypeParam>( + /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + { + {1, 4, 7, 10, 13, 16, 19, 22, 25}, // + {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} // + }); +} + +} // namespace +} // namespace kernels +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp index e40a91e97..96835fbfc 100644 --- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Sqrt.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,11 +30,15 @@ using namespace testing; void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data, std::initializer_list<float> output_data) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Sqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data)); @@ -58,20 +63,25 @@ TEST(SqrtTest, SimpleSqrt) TEST(SqrtTest, Input_Output_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S32); Sqrt kernel(&input_tensor, &output_tensor); EXPECT_ANY_THROW(kernel.configure()); } -TEST(AddTest, Invalid_Input_Type_NEG) +TEST(SqrtTest, Invalid_Input_Type_NEG) { - Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + + Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); Sqrt kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-interpreter/src/kernels/Square.test.cpp index 730d6405c..51662dea7 100644 --- a/compiler/luci-interpreter/src/kernels/Square.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Square.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Square.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,13 +30,17 @@ using namespace testing; TEST(SquareTest, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{3, 1, 2}; std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Square kernel(&input_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736}; diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp index a72eaadfa..2819c01e2 100644 --- a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp +++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp @@ -17,6 +17,7 @@ #include "kernels/SquaredDifference.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -29,15 +30,20 @@ using namespace testing; TEST(SquaredDifferenceTest, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{3, 1, 2}; std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; - Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1); - Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2); + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001}; @@ -46,16 +52,21 @@ TEST(SquaredDifferenceTest, Float) TEST(SquaredDifferenceTest, FloatBroadcast) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape1{3, 1, 2}; Shape input_shape2{1}; std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; std::vector<float> input_data2{1.0}; - Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1); - Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2); + Tensor input_tensor1 = + makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get()); + Tensor input_tensor2 = + makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536}; diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp index 1c81893b9..d3326fe98 100644 --- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Squeeze.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,8 +32,11 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<T> input_data, std::initializer_list<T> output_data, std::initializer_list<int32_t> squeeze_dims) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); SqueezeParams params{}; @@ -40,6 +44,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int Squeeze kernel(&input_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp index 37b0dd8c5..c6452cdb0 100644 --- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp +++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/strided_slice.h> #include <stdexcept> diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp index 66dffcaf2..399cdebed 100644 --- a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp +++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp @@ -16,6 +16,7 @@ #include "kernels/StridedSlice.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -28,6 +29,8 @@ using namespace testing; TEST(StridedSliceTest, Float) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{2, 3, 2}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape begin_shape{3}; @@ -36,10 +39,13 @@ TEST(StridedSliceTest, Float) std::vector<int32_t> end_data{1, 3, 2}; Shape strides_shape{3}; std::vector<int32_t> strides_data{1, 1, 1}; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); - Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data); - Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data); - Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); StridedSliceParams params{}; @@ -52,6 +58,7 @@ TEST(StridedSliceTest, Float) StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> output_shape{3, 2}; @@ -62,6 +69,8 @@ TEST(StridedSliceTest, Float) TEST(StridedSliceTest, Uint8) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Shape input_shape{2, 3, 2}; std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; Shape begin_shape{3}; @@ -70,10 +79,13 @@ TEST(StridedSliceTest, Uint8) std::vector<int32_t> end_data{1, 3, 2}; Shape strides_shape{3}; std::vector<int32_t> strides_data{1, 1, 1}; - Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data); - Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data); - Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data); - Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get()); + Tensor begin_tensor = + makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get()); + Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get()); + Tensor strides_tensor = + makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0); StridedSliceParams params{}; @@ -86,6 +98,7 @@ TEST(StridedSliceTest, Uint8) StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<int32_t> output_shape{3, 2}; diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp index 3c7588d62..603c62d0f 100644 --- a/compiler/luci-interpreter/src/kernels/Sub.cpp +++ b/compiler/luci-interpreter/src/kernels/Sub.cpp @@ -18,7 +18,9 @@ #include "kernels/Sub.h" #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> +#include "PALSub.h" + +#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h> #include <stdexcept> @@ -74,9 +76,9 @@ void Sub::evalFloat() const } else { - tflite::optimized_ops::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()), - getTensorShape(input2()), getTensorData<float>(input2()), - getTensorShape(output()), getTensorData<float>(output())); + luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()), + getTensorShape(input2()), getTensorData<float>(input2()), + getTensorShape(output()), getTensorData<float>(output())); } } diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp index f560ceb36..c189f4481 100644 --- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Sub.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" #include <algorithm> @@ -33,6 +34,14 @@ using std::vector; using std::transform; using std::initializer_list; +class SubTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + // for quantized Add, the error shouldn't exceed step float GetTolerance(float min, float max) { @@ -40,7 +49,7 @@ float GetTolerance(float min, float max) return kQuantizedStep; } -TEST(SubTest, Uint8) +TEST_F(SubTest, Uint8) { Shape base_shape = {2, 3, 1, 2}; vector<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, @@ -62,10 +71,10 @@ TEST(SubTest, Uint8) pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f); for (size_t i = 0; i < output_data.size(); ++i) { - Tensor input1_tensor = - makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data); - Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first, - quant_param.second, test_data); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); @@ -74,6 +83,7 @@ TEST(SubTest, Uint8) Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -90,10 +100,10 @@ TEST(SubTest, Uint8) // Re-run with exchanged inputs. for (size_t i = 0; i < output_data.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first, - quant_param.second, test_data); - Tensor input2_tensor = - makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data); + Tensor input1_tensor = makeInputTensor<DataType::U8>( + test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::U8>( + base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second); @@ -102,6 +112,7 @@ TEST(SubTest, Uint8) Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(dequantizeTensorData(output_tensor), @@ -110,7 +121,7 @@ TEST(SubTest, Uint8) } } -TEST(SubTest, Float) +TEST_F(SubTest, Float) { Shape base_shape = {2, 3, 1, 2}; vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}}; @@ -130,8 +141,10 @@ TEST(SubTest, Float) vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f}; for (size_t i = 0; i < test_shapes.size(); ++i) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data); - Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data); + Tensor input1_tensor = + makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get()); + Tensor input2_tensor = + makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); SubParams params{}; @@ -139,6 +152,7 @@ TEST(SubTest, Float) Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f)) @@ -148,10 +162,10 @@ TEST(SubTest, Float) } } -TEST(SubTest, Input_Output_Type_NEG) +TEST_F(SubTest, Input_Output_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}); - Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}); + Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); SubParams params{}; @@ -161,10 +175,10 @@ TEST(SubTest, Input_Output_Type_NEG) EXPECT_ANY_THROW(kernel.configure()); } -TEST(SubTest, Invalid_Input_Type_NEG) +TEST_F(SubTest, Invalid_Input_Type_NEG) { - Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}); - Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}); + Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get()); + Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S64); SubParams params{}; @@ -172,6 +186,7 @@ TEST(SubTest, Invalid_Input_Type_NEG) Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp index 1c3d1281d..c4fa16912 100644 --- a/compiler/luci-interpreter/src/kernels/Tanh.cpp +++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/tanh.h> namespace luci_interpreter { diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp index ef727d6eb..bfae479a9 100644 --- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Tanh.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -27,18 +28,28 @@ namespace using namespace testing; -TEST(TanhTest, Float) +class TanhTest : public ::testing::Test +{ +protected: + void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; +}; + +TEST_F(TanhTest, Float) { Shape input_shape{1, 2, 4, 1}; std::vector<float> input_data{ 0, -6, 2, 4, // 3, -2, 10, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Tanh kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -48,7 +59,7 @@ TEST(TanhTest, Float) EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data)); } -TEST(TanhTest, Uint8) +TEST_F(TanhTest, Uint8) { float kMin = -1; float kMax = 127.f / 128.f; @@ -69,13 +80,15 @@ TEST(TanhTest, Uint8) 0, -6, 2, 4, // -4, -2, 8, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, - input_quant_param.second, input_data); + Tensor input_tensor = + makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second, + input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second); Tanh kernel(&input_tensor, &output_tensor); kernel.configure(); + _memory_manager->allocate_memory(output_tensor); kernel.execute(); std::vector<float> ref_output_data{ @@ -97,7 +110,7 @@ TEST(TanhTest, Uint8) EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape)); } -TEST(TanhTest, InputTypeInvalid_NEG) +TEST_F(TanhTest, InputTypeInvalid_NEG) { std::vector<int64_t> input_data{ 0, -6, 2, 4, // @@ -113,14 +126,16 @@ TEST(TanhTest, InputTypeInvalid_NEG) 0, -6, 2, 4, // -4, -2, 8, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data); + Tensor input_tensor = + makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::FLOAT32); Tanh kernel(&input_tensor, &output_tensor); + _memory_manager->allocate_memory(output_tensor); EXPECT_ANY_THROW(kernel.execute()); } -TEST(TanhTest, InputOutputMismatch_NEG) +TEST_F(TanhTest, InputOutputMismatch_NEG) { std::vector<float> input_data{ 0, -6, 2, 4, // @@ -136,7 +151,8 @@ TEST(TanhTest, InputOutputMismatch_NEG) 0, -6, 2, 4, // -4, -2, 8, 1, // }; - Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data); + Tensor input_tensor = + makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8); Tanh kernel(&input_tensor, &output_tensor); diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp index 831dc4247..4d983adda 100644 --- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp +++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp @@ -43,6 +43,11 @@ std::vector<float> dequantizeTensorData(const Tensor &tensor) std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor); return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); } + if (tensor.element_type() == DataType::S8) + { + std::vector<int8_t> data = extractTensorData<int8_t>(tensor); + return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point()); + } else if (tensor.element_type() == DataType::S16) { // S16 quantization is symmetric, so zero point should be zero. diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h index c4c73d546..1f5a0c308 100644 --- a/compiler/luci-interpreter/src/kernels/TestUtils.h +++ b/compiler/luci-interpreter/src/kernels/TestUtils.h @@ -19,6 +19,7 @@ #define LUCI_INTERPRETER_KERNELS_TESTUTILS_H #include "luci_interpreter/core/Tensor.h" +#include "luci_interpreter/MemoryManager.h" #include <type_traits> @@ -36,9 +37,11 @@ template <typename T> std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point); template <DataType DT> -Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data) +Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data, + IMemoryManager *memory_manager) { Tensor tensor(DT, shape, {}, ""); + memory_manager->allocate_memory(tensor); tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type)); return tensor; } @@ -50,16 +53,18 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeIm * @param scale scale of quantized number * @param zero_point zero point of quantized number, should be 0 for signed datatypes * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor * @return created tensor */ template <DataType DT> Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point, - const std::vector<float> &data) + const std::vector<float> &data, IMemoryManager *memory_manager) { using NativeT = typename DataTypeImpl<DT>::Type; Tensor tensor(DT, shape, {{scale}, {zero_point}}, ""); std::vector<NativeT> quantized_data = quantize<NativeT>(data.data(), data.size(), scale, zero_point); + memory_manager->allocate_memory(tensor); tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); return tensor; } @@ -72,12 +77,13 @@ Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point, * @param zero_points zero points of quantized number, should be 0 for signed datatypes * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels * @param data floating point data for quantization + * @param memory_manager memory manager for allocating memory to tensor * @return created tensor */ template <DataType DT> Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales, const std::vector<int32_t> &zero_points, int quantized_dimension, - const std::vector<float> &data) + const std::vector<float> &data, IMemoryManager *memory_manager) { using NativeT = typename DataTypeImpl<DT>::Type; assert(quantized_dimension < shape.num_dims()); @@ -113,6 +119,7 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales, part_quantized_data.end()); } assert(quantized_data.size() == shape.num_elements()); + memory_manager->allocate_memory(tensor); tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT)); return tensor; } @@ -127,12 +134,26 @@ template <typename T> constexpr DataType getElementType() { if (std::is_same<T, float>::value) return DataType::FLOAT32; + if (std::is_same<T, double>::value) + return DataType::FLOAT64; if (std::is_same<T, uint8_t>::value) return DataType::U8; + if (std::is_same<T, uint16_t>::value) + return DataType::U16; + if (std::is_same<T, uint32_t>::value) + return DataType::U32; + if (std::is_same<T, uint64_t>::value) + return DataType::U64; + if (std::is_same<T, int8_t>::value) + return DataType::S8; + if (std::is_same<T, int16_t>::value) + return DataType::S16; if (std::is_same<T, int32_t>::value) return DataType::S32; if (std::is_same<T, int64_t>::value) return DataType::S64; + if (std::is_same<T, bool>::value) + return DataType::BOOL; return DataType::Unknown; } @@ -156,8 +177,6 @@ std::vector<T> quantize(const float *data, size_t num_elements, float scale, int float q_min{}, q_max{}; if (std::is_signed<T>::value) { - // For now, assume that signed type implies signed symmetric quantization. - assert(zero_point == 0); q_min = -std::numeric_limits<T>::max(); q_max = std::numeric_limits<T>::max(); } diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp index c1a11cdb0..802d87295 100644 --- a/compiler/luci-interpreter/src/kernels/Transpose.cpp +++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp @@ -18,7 +18,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/transpose.h> #include <stdexcept> @@ -37,7 +37,7 @@ void Transpose::configure() { // Transpose op only supports 1D-4D input arrays. int dims = input()->shape().num_dims(); - const int *perm_data = getTensorData<int32_t>(perm()); + const int32_t *perm_data = getTensorData<int32_t>(perm()); assert(input()->shape().num_dims() <= 4); assert(input()->element_type() == output()->element_type()); @@ -58,8 +58,8 @@ void Transpose::configure() void Transpose::execute() const { tflite::TransposeParams params{}; - const int *perm_data = getTensorData<int32_t>(perm()); - const int size = perm()->shape().dim(0); + const int32_t *perm_data = getTensorData<int32_t>(perm()); + const int32_t size = perm()->shape().dim(0); params.perm_count = size; for (int i = 0; i < size; i++) params.perm[i] = perm_data[i]; diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp index f0a915c35..107179910 100644 --- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp @@ -16,6 +16,7 @@ #include "kernels/Transpose.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -31,13 +32,16 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data, std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); - Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(element_type); Transpose kernel(&input_tensor, &perm_tensor, &output_tensor); kernel.configure(); + memory_manager->allocate_memory(output_tensor); kernel.execute(); EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp index 0c70756b2..1b5f9d941 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp @@ -19,7 +19,7 @@ #include "kernels/Utils.h" -#include <tensorflow/lite/kernels/internal/reference/reference_ops.h> +#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h> #include <stdexcept> @@ -30,8 +30,10 @@ namespace kernels { TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, - const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms) - : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params) + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms) + : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, + {output, scratch_tensor}, params) { } @@ -74,15 +76,18 @@ void TransposeConv::configure() if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16) { - DataType scratch_data_type = - input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32; - _scratch_tensor = - std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, ""); + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->resize(output()->shape()); const std::vector<double> real_multipliers = getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); _quant_multipliers = quantizeMultipliers(real_multipliers); } + else + { + auto scratch_tensor = getOutputTensors()[1]; + scratch_tensor->set_allocatable(false); + } } void TransposeConv::execute() const @@ -111,8 +116,6 @@ void TransposeConv::execute() const default: throw std::runtime_error("Unsupported type."); } - if (!!_scratch_tensor) - _scratch_tensor->deallocate(); } void TransposeConv::evalFloat() const @@ -148,13 +151,15 @@ void TransposeConv::evalQuantized() const op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min(); op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max(); + auto scratch_tensor = getOutputTensors()[1]; + tflite::reference_ops::TransposeConv(op_params, // getTensorShape(input()), getTensorData<uint8>(input()), // getTensorShape(filter()), getTensorData<uint8>(filter()), // getTensorShape(bias()), getTensorData<int32_t>(bias()), // getTensorShape(output()), getTensorData<uint8>(output()), // tflite::RuntimeShape(), nullptr, // - getTensorData<int32_t>(_scratch_tensor.get())); + getTensorData<int32_t>(scratch_tensor)); } void TransposeConv::evalQuantizedPerChannel() const @@ -163,7 +168,9 @@ void TransposeConv::evalQuantizedPerChannel() const const auto *filter_data = getTensorData<uint8_t>(filter()); const auto *bias_data = getTensorData<int32_t>(bias()); auto *output_data = getTensorData<uint8_t>(output()); - auto *scratch_data = getTensorData<int32_t>(_scratch_tensor.get()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData<int32_t>(scratch_tensor); const Shape &input_shape = input()->shape(); const Shape &filter_shape = filter()->shape(); @@ -186,7 +193,7 @@ void TransposeConv::evalQuantizedPerChannel() const int32_t activation_max{}; calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); - std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t)); + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t)); BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); for (int32_t batch = 0; batch < batches; ++batch) @@ -255,7 +262,9 @@ void TransposeConv::evalQuantizedS16() const const auto *filter_data = getTensorData<int16_t>(filter()); const auto *bias_data = getTensorData<int64_t>(bias()); auto *output_data = getTensorData<int16_t>(output()); - auto *scratch_data = getTensorData<int64_t>(_scratch_tensor.get()); + + auto scratch_tensor = getOutputTensors()[1]; + auto *scratch_data = getTensorData<int64_t>(scratch_tensor); const Shape &input_shape = input()->shape(); const Shape &filter_shape = filter()->shape(); @@ -278,7 +287,7 @@ void TransposeConv::evalQuantizedS16() const int32_t activation_max{}; calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max); - std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int64_t)); + std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t)); BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers); for (int32_t batch = 0; batch < batches; ++batch) diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h index 2e0beece8..cea0cf3c7 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.h +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h @@ -31,7 +31,8 @@ class TransposeConv : public KernelWithParams<TransposeConvParams> { public: TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, - const Tensor *bias, Tensor *output, const TransposeConvParams ¶ms); + const Tensor *bias, Tensor *output, Tensor *scratch_tensor, + const TransposeConvParams ¶ms); ~TransposeConv(); @@ -51,8 +52,6 @@ private: void evalQuantizedS16() const; private: - std::unique_ptr<Tensor> _scratch_tensor; - int32_t _padding_height{}; int32_t _padding_width{}; // The scaling factor from input to output (aka the 'real multiplier') can diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp index 9bcb015c1..4856e1b87 100644 --- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp +++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp @@ -16,6 +16,7 @@ #include "kernels/TransposeConv.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -35,11 +36,18 @@ void Check(std::initializer_list<int32_t> output_shape_shape, std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height, int32_t stride_width) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + constexpr DataType element_type = getElementType<T>(); Tensor output_shape_tensor = - makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data); - Tensor weight_tensor = makeInputTensor<element_type>(weight_shape, weight_data); - Tensor input_data_tensor = makeInputTensor<element_type>(input_shape, input_data); + makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get()); + Tensor weight_tensor = + makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get()); + Tensor input_data_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); + + DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); Tensor output_tensor = makeOutputTensor(element_type); TransposeConvParams params{}; @@ -49,17 +57,22 @@ void Check(std::initializer_list<int32_t> output_shape_shape, if (bias_data.size() != 0) { - Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data); + Tensor bias_tensor = + makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get()); TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); } else { TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); } EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data)); @@ -114,6 +127,8 @@ TEST(TransposeConvTest, SimpleBiasTest) TEST(TransposeConvTest, UInt8) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4}; std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; std::vector<float> bias_data{3, 4}; @@ -131,23 +146,30 @@ TEST(TransposeConvTest, UInt8) auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96 auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64 - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data); - Tensor filter_tensor = makeInputTensor<DataType::U8>({2, 3, 3, 1}, filter_quant.first, - filter_quant.second, filter_data); - Tensor bias_tensor = - makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data); - Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>( + {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, + 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + TransposeConvParams params{}; params.padding = Padding::VALID; params.stride_height = 2; params.stride_width = 2; TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); @@ -156,6 +178,8 @@ TEST(TransposeConvTest, UInt8) TEST(TransposeConvTest, UInt8_CWQ) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const int32_t output_channels = 2; std::vector<float> input_data{1, 2, 3, 4}; std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; @@ -190,23 +214,30 @@ TEST(TransposeConvTest, UInt8_CWQ) bias_scales.push_back(filter_quant_params[i].first * input_quant.first); std::vector<int32_t> zerop(output_channels, 0); - Tensor input_tensor = - makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data); - Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales, - filter_zerops, 0, filter_data); - Tensor bias_tensor = - makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data); - Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data); + Tensor input_tensor = makeInputTensor<DataType::U8>( + {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::U8>( + {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, + bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second); + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + TransposeConvParams params{}; params.padding = Padding::VALID; params.stride_height = 2; params.stride_width = 2; TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); @@ -215,6 +246,8 @@ TEST(TransposeConvTest, UInt8_CWQ) TEST(TransposeConvTest, SInt16) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + std::vector<float> input_data{1, 2, 3, 4}; std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18}; std::vector<float> bias_data{3, 4}; @@ -227,20 +260,30 @@ TEST(TransposeConvTest, SInt16) 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, // }; - Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data); - Tensor filter_tensor = makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data); - Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get()); + Tensor filter_tensor = + makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get()); + Tensor bias_tensor = + makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0); + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + TransposeConvParams params{}; params.padding = Padding::VALID; params.stride_height = 2; params.stride_width = 2; TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); @@ -249,6 +292,8 @@ TEST(TransposeConvTest, SInt16) TEST(TransposeConvTest, SInt16_CWQ_weights) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + const int output_channels = 2; const Shape input_shape{1, 2, 2, 1}; const Shape filter_shape{output_channels, 3, 3, 1}; @@ -273,21 +318,30 @@ TEST(TransposeConvTest, SInt16_CWQ_weights) std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale}; const std::vector<int32_t> zerop(2, 0); - Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data); - Tensor filter_tensor = - makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data); - Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data); - Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data); + Tensor input_tensor = + makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get()); + Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, + filter_data, memory_manager.get()); + Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data, + memory_manager.get()); + Tensor output_shape_tensor = + makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get()); Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0); + DataType scratch_data_type = + input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + Tensor scratch_tensor(scratch_data_type, Shape({}), {}, ""); + TransposeConvParams params{}; params.padding = Padding::VALID; params.stride_height = 2; params.stride_width = 2; TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor, - &output_tensor, params); + &output_tensor, &scratch_tensor, params); kernel.configure(); + memory_manager->allocate_memory(output_tensor); + memory_manager->allocate_memory(scratch_tensor); kernel.execute(); EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data)); diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp index 6d611e12e..4f22c9f30 100644 --- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp +++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp @@ -17,6 +17,7 @@ #include "kernels/Unpack.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -32,10 +33,12 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data, const std::vector<std::initializer_list<int32_t>> &exp_output_shape, std::vector<std::initializer_list<T>> exp_output_data) { + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); constexpr DataType element_type = getElementType<T>(); const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis); - Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data); + Tensor input_tensor = + makeInputTensor<element_type>(input_shape, input_data, memory_manager.get()); std::vector<Tensor> output_tensors; output_tensors.reserve(num_outputs); for (int i = 0; i < num_outputs; ++i) @@ -54,6 +57,10 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data, Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params); kernel.configure(); + for (int i = 0; i < num_outputs; i++) + { + memory_manager->allocate_memory(output_tensors[i]); + } kernel.execute(); for (int i = 0; i < num_outputs; ++i) diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp index 83faa7d7f..6e83e37f6 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.cpp +++ b/compiler/luci-interpreter/src/kernels/Utils.cpp @@ -91,7 +91,7 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max) { - // For now, assume that signed type implies signed symmetric quantization. + assert(output->zero_points().size() == 1); int32_t qmin{}; int32_t qmax{}; switch (output->element_type()) @@ -101,11 +101,11 @@ void calculateActivationRangeQuantized(Activation activation, const Tensor *outp qmax = std::numeric_limits<uint8_t>::max(); break; case DataType::S8: - assert(output->zero_point() == 0); qmin = -std::numeric_limits<int8_t>::max(); qmax = std::numeric_limits<int8_t>::max(); break; case DataType::S16: + // For now, assume that signed int16 type implies signed symmetric quantization. assert(output->zero_point() == 0); qmin = -std::numeric_limits<int16_t>::max(); qmax = std::numeric_limits<int16_t>::max(); diff --git a/compiler/luci-interpreter/src/kernels/While.cpp b/compiler/luci-interpreter/src/kernels/While.cpp index d4676467d..153bd1a99 100644 --- a/compiler/luci-interpreter/src/kernels/While.cpp +++ b/compiler/luci-interpreter/src/kernels/While.cpp @@ -49,6 +49,13 @@ void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst) copy(const_src, dst); } +// TODO: Think about how allocate memory for output in main graph +void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph) +{ + for (auto tensor : tensors) + run_graph->configureAllocations(tensor); +} + } // namespace While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, @@ -78,11 +85,15 @@ void While::execute() const const auto &cond_inputs = _cond_graph->getInputTensors(); const auto &cond_outputs = _cond_graph->getOutputTensors(); + configureTensorsAllocations(cond_inputs, _cond_graph); + copy(getInputTensors(), cond_inputs); const auto &body_inputs = _body_graph->getInputTensors(); const auto &body_outputs = _body_graph->getOutputTensors(); + configureTensorsAllocations(body_inputs, _body_graph); + while (true) { _cond_graph->execute(); diff --git a/compiler/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-interpreter/src/kernels/While.test.cpp index a066d2c12..cb8f89130 100644 --- a/compiler/luci-interpreter/src/kernels/While.test.cpp +++ b/compiler/luci-interpreter/src/kernels/While.test.cpp @@ -20,6 +20,7 @@ #include "kernels/Less.h" #include "kernels/While.h" #include "kernels/TestUtils.h" +#include "luci_interpreter/TestMemoryManager.h" namespace luci_interpreter { @@ -30,14 +31,18 @@ namespace using namespace testing; -RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond) +RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond, + IMemoryManager *memory_manager) { - RuntimeGraph *graph = module->addGraph(); + RuntimeGraph *graph = module->addGraph(memory_manager); Tensor *input = graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); Tensor *output = graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, "")); + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + graph->setInputTensors({input}); graph->setOutputTensors({output}); @@ -46,14 +51,18 @@ RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *i return graph; } -RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add) +RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add, + IMemoryManager *memory_manager) { - RuntimeGraph *graph = module->addGraph(); + RuntimeGraph *graph = module->addGraph(memory_manager); Tensor *input = graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); Tensor *output = graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, "")); + memory_manager->allocate_memory(*input); + memory_manager->allocate_memory(*output); + graph->setInputTensors({input}); graph->setOutputTensors({output}); @@ -66,18 +75,22 @@ RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *i TEST(WhileTest, FloatLoop10) { - Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}); + std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>(); + Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get()); Tensor output = makeOutputTensor(DataType::FLOAT32); - Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}); - Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}); + Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get()); + Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get()); RuntimeModule module(nullptr); - RuntimeGraph *cond_graph = buildCondSubgraph(&module, DataType::FLOAT32, &input_cond); - RuntimeGraph *body_graph = buildBodySubgraph(&module, DataType::FLOAT32, &input_add); + RuntimeGraph *cond_graph = + buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get()); + RuntimeGraph *body_graph = + buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get()); While kernel({&input}, {&output}, cond_graph, body_graph); kernel.configure(); + memory_manager->allocate_memory(output); kernel.execute(); EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10})); diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt index 782f46761..974283a2f 100644 --- a/compiler/luci-interpreter/src/loader/CMakeLists.txt +++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt @@ -7,14 +7,23 @@ set(SOURCES KernelBuilder.cpp ModuleLoader.h ModuleLoader.cpp - RuntimeToIR.h) + RuntimeToIR.h + nodes/Builders.h) -add_library(luci_interpreter_loader STATIC ${SOURCES}) -set_target_properties(luci_interpreter_loader PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") -target_link_libraries(luci_interpreter_loader - PUBLIC luci_lang luci_interpreter_core - PRIVATE luci_interpreter_kernels nncc_common) +# include kernel specific builders +macro(REGISTER_KERNEL NODE) + list(APPEND SOURCES "nodes/${NODE}.cpp") +endmacro(REGISTER_KERNEL) +include(${KERNEL_REGISTER_FILE}) + +add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES}) +set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}") +target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}") + +target_link_libraries(${LUCI_INTERPRETER_LOADER} + PUBLIC luci_lang ${LUCI_INTERPRETER_CORE} + PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common) if(NOT ENABLE_TEST) return() @@ -24,5 +33,5 @@ nnas_find_package(GTest REQUIRED) set(TEST_SOURCES KernelBuilder.test.cpp) -GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES}) -target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader) +GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES}) +target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER}) diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index ee45ad747..b55e7c504 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -57,6 +57,8 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size) return getNodeDataImpl<DataType::U8>(node, data_size); case DataType::FLOAT32: return getNodeDataImpl<DataType::FLOAT32>(node, data_size); + case DataType::S8: + return getNodeDataImpl<DataType::S8>(node, data_size); case DataType::S16: return getNodeDataImpl<DataType::S16>(node, data_size); case DataType::S32: @@ -82,6 +84,7 @@ bool isExecutableNode(const luci::CircleNode *node) // The following nodes denote outputs of multiple-output nodes. case luci::CircleOpcode::CIRCLEIFOUT: case luci::CircleOpcode::CIRCLESPLITOUT: + case luci::CircleOpcode::CIRCLESPLITVOUT: case luci::CircleOpcode::CIRCLEUNPACKOUT: case luci::CircleOpcode::CIRCLEWHILEOUT: return false; @@ -112,9 +115,10 @@ bool isTensorProducingNode(const luci::CircleNode *node) GraphLoader::GraphLoader( const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager) : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir), - _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor) + _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor), + _memory_manager(memory_manager) { } @@ -156,7 +160,10 @@ void GraphLoader::loadTensors() size_t data_size{}; const void *const_data = getNodeData(const_node, &data_size); if (const_data != nullptr) + { + _memory_manager->allocate_memory(*tensor); tensor->writeData(const_data, data_size); + } } _node_to_tensor.emplace(node, tensor.get()); @@ -173,6 +180,7 @@ void GraphLoader::initInputOutputTensors() const for (size_t i = 0; i < input_nodes.size(); ++i) { input_tensors[i] = _node_to_tensor.at(input_nodes[i]); + _memory_manager->allocate_memory(*input_tensors[i]); } _runtime_graph->setInputTensors(input_tensors); diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h index 89c5bcad7..fe066ecf8 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.h +++ b/compiler/luci-interpreter/src/loader/GraphLoader.h @@ -19,6 +19,7 @@ #include "core/RuntimeGraph.h" #include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" #include <loco/IR/Graph.h> @@ -32,7 +33,8 @@ class GraphLoader public: GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir, const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager); void loadTensors(); void initInputOutputTensors() const; @@ -42,6 +44,7 @@ private: const loco::Graph *_graph; RuntimeGraph *_runtime_graph; RuntimeToIR &_runtime_to_ir; + IMemoryManager *_memory_manager; const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph; std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp index 4cb8bd691..8483a9a3d 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp @@ -15,1240 +15,90 @@ */ #include "loader/KernelBuilder.h" - -#include "kernels/Add.h" -#include "kernels/ArgMax.h" -#include "kernels/AveragePool2D.h" -#include "kernels/BatchToSpaceND.h" -#include "kernels/Cast.h" -#include "kernels/Concatenation.h" -#include "kernels/Conv2D.h" -#include "kernels/DepthToSpace.h" -#include "kernels/DepthwiseConv2D.h" -#include "kernels/Div.h" -#include "kernels/Elu.h" -#include "kernels/Exp.h" -#include "kernels/Floor.h" -#include "kernels/FloorDiv.h" -#include "kernels/Equal.h" -#include "kernels/FullyConnected.h" -#include "kernels/Greater.h" -#include "kernels/GreaterEqual.h" -#include "kernels/If.h" -#include "kernels/InstanceNorm.h" -#include "kernels/L2Normalize.h" -#include "kernels/L2Pool2D.h" -#include "kernels/LeakyRelu.h" -#include "kernels/Less.h" -#include "kernels/LessEqual.h" -#include "kernels/LocalResponseNormalization.h" -#include "kernels/LogicalAnd.h" -#include "kernels/LogicalNot.h" -#include "kernels/LogicalOr.h" -#include "kernels/Logistic.h" -#include "kernels/LogSoftmax.h" -#include "kernels/Maximum.h" -#include "kernels/MaxPool2D.h" -#include "kernels/Mean.h" -#include "kernels/Minimum.h" -#include "kernels/MirrorPad.h" -#include "kernels/Mul.h" -#include "kernels/Neg.h" -#include "kernels/NotEqual.h" -#include "kernels/Pack.h" -#include "kernels/Pad.h" -#include "kernels/PadV2.h" -#include "kernels/Pow.h" -#include "kernels/PRelu.h" -#include "kernels/Relu.h" -#include "kernels/Relu6.h" -#include "kernels/Reshape.h" -#include "kernels/ResizeBilinear.h" -#include "kernels/ResizeNearestNeighbor.h" -#include "kernels/ReverseV2.h" -#include "kernels/Rsqrt.h" -#include "kernels/Slice.h" -#include "kernels/Softmax.h" -#include "kernels/SpaceToBatchND.h" -#include "kernels/SpaceToDepth.h" -#include "kernels/Split.h" -#include "kernels/StridedSlice.h" -#include "kernels/Sqrt.h" -#include "kernels/Square.h" -#include "kernels/SquaredDifference.h" -#include "kernels/Squeeze.h" -#include "kernels/Sub.h" -#include "kernels/Tanh.h" -#include "kernels/Unpack.h" -#include "kernels/Transpose.h" -#include "kernels/TransposeConv.h" -#include "kernels/While.h" +#include "loader/nodes/Builders.h" #include <stdexcept> -namespace -{ - -template <typename CircleNodeOut> -std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode *node) -{ - std::vector<const CircleNodeOut *> output_nodes; - for (const loco::Node *loco_node : loco::succs(node)) - { - output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node)); - } - std::sort(output_nodes.begin(), output_nodes.end(), - [](const CircleNodeOut *node1, const CircleNodeOut *node2) { - return node1->index() < node2->index(); - }); - return {output_nodes.cbegin(), output_nodes.cend()}; -} - -} // namespace - namespace luci_interpreter { -// TODO move to anonymous namespace -enum class KB +#define CIRCLE_NODE(OPCODE, CLASS) CLASS, +#define CIRCLE_VNODE(OPCODE, CLASS) CLASS, + +// This enum is auxiliary. +// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE, +// because list of target operators is in format of CLASS names +enum class BuilderId { - ABC, - DEF, - GHIJ, - KLMN, - OPQR, - STUV, - WXYZ, +#include <luci/IR/CircleNodes.lst> + Size // casts to count of values in BuilderId enum }; -#define DECLARE_VISIT(CLASS) std::unique_ptr<Kernel> visit(const luci::CLASS *) override +#undef CIRCLE_VNODE +#undef CIRCLE_NODE -template <KB kb> class KernelBuilderLet; +/** + * @brief Registry of kernel builders + * + * This class contains mapping from Opcodes to kernel builder functions + */ -template <> -class KernelBuilderLet<KB::ABC> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper +class KernelBuilderRegistry { public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) - { - } + using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *, + KernelBuilderHelper &); -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleAdd); - DECLARE_VISIT(CircleArgMax); - DECLARE_VISIT(CircleAveragePool2D); - DECLARE_VISIT(CircleBatchToSpaceND); - DECLARE_VISIT(CircleCast); - DECLARE_VISIT(CircleConcatenation); - DECLARE_VISIT(CircleConst); - DECLARE_VISIT(CircleConv2D); -}; - -template <> -class KernelBuilderLet<KB::DEF> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper -{ -public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) + KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr) { - } +#define REGISTER_KERNEL(name) \ + register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name); -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleDepthToSpace); - DECLARE_VISIT(CircleDepthwiseConv2D); - DECLARE_VISIT(CircleDiv); - DECLARE_VISIT(CircleElu); - DECLARE_VISIT(CircleEqual); - DECLARE_VISIT(CircleExp); - DECLARE_VISIT(CircleFloor); - DECLARE_VISIT(CircleFloorDiv); - DECLARE_VISIT(CircleFullyConnected); -}; +#include "KernelsToBuild.lst" -template <> -class KernelBuilderLet<KB::GHIJ> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper -{ -public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) - { +#undef REGISTER_KERNEL } -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleGreater); - DECLARE_VISIT(CircleGreaterEqual); - DECLARE_VISIT(CircleIf); - DECLARE_VISIT(CircleInput); - DECLARE_VISIT(CircleInstanceNorm); -}; - -template <> -class KernelBuilderLet<KB::KLMN> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper -{ -public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) + KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const { + return _operator_builders.at(size_t(opcode)); } -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleL2Normalize); - DECLARE_VISIT(CircleL2Pool2D); - DECLARE_VISIT(CircleLeakyRelu); - DECLARE_VISIT(CircleLess); - DECLARE_VISIT(CircleLessEqual); - DECLARE_VISIT(CircleLocalResponseNormalization); - DECLARE_VISIT(CircleLogSoftmax); - DECLARE_VISIT(CircleLogicalAnd); - DECLARE_VISIT(CircleLogicalNot); - DECLARE_VISIT(CircleLogicalOr); - DECLARE_VISIT(CircleLogistic); - DECLARE_VISIT(CircleMaxPool2D); - DECLARE_VISIT(CircleMaximum); - DECLARE_VISIT(CircleMean); - DECLARE_VISIT(CircleMinimum); - DECLARE_VISIT(CircleMirrorPad); - DECLARE_VISIT(CircleMul); - DECLARE_VISIT(CircleNeg); - DECLARE_VISIT(CircleNotEqual); -}; +private: + std::vector<KernelBuilderFunc *> _operator_builders; -template <> -class KernelBuilderLet<KB::OPQR> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper -{ -public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) + void register_kernel_builder(BuilderId id, KernelBuilderFunc *func) { + // Using BuilderId is a duplicate of luci::CirclreOpcode, + // size_t(id) is equal to size_t(corresponding operation opcode). + assert(size_t(id) < _operator_builders.size()); + _operator_builders[size_t(id)] = func; } - -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleOutput); - DECLARE_VISIT(CirclePRelu); - DECLARE_VISIT(CirclePack); - DECLARE_VISIT(CirclePad); - DECLARE_VISIT(CirclePadV2); - DECLARE_VISIT(CirclePow); - DECLARE_VISIT(CircleRelu); - DECLARE_VISIT(CircleRelu6); - DECLARE_VISIT(CircleReshape); - DECLARE_VISIT(CircleResizeBilinear); - DECLARE_VISIT(CircleResizeNearestNeighbor); - DECLARE_VISIT(CircleReverseV2); - DECLARE_VISIT(CircleRsqrt); }; -template <> -class KernelBuilderLet<KB::STUV> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper +KernelBuilder::KernelBuilder( + const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) { -public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) - { - } - -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleSlice); - DECLARE_VISIT(CircleSoftmax); - DECLARE_VISIT(CircleSpaceToBatchND); - DECLARE_VISIT(CircleSpaceToDepth); - DECLARE_VISIT(CircleSplit); - DECLARE_VISIT(CircleSqrt); - DECLARE_VISIT(CircleSquare); - DECLARE_VISIT(CircleSquaredDifference); - DECLARE_VISIT(CircleSqueeze); - DECLARE_VISIT(CircleStridedSlice); - DECLARE_VISIT(CircleSub); - DECLARE_VISIT(CircleTanh); - DECLARE_VISIT(CircleTranspose); - DECLARE_VISIT(CircleTransposeConv); - DECLARE_VISIT(CircleUnpack); -}; + _builder_registry = std::make_unique<KernelBuilderRegistry>(); +} -template <> -class KernelBuilderLet<KB::WXYZ> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>, - public KernelBuilderHelper +KernelBuilder::~KernelBuilder() { -public: - KernelBuilderLet( - const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) - { - } - -public: - std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; } - -public: - DECLARE_VISIT(CircleWhile); -}; - -#undef DECLARE_VISIT + // Need to define in this CPP to hide KernelBuilderRegistry internals. + // This destructor deletes _builder_registry +} std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node) { -#define VISIT_KB(GRP) \ - do \ - { \ - KernelBuilderLet<KB::GRP> kbl(graph_to_runtime_graph(), node_to_tensor()); \ - auto ret = node->accept(&kbl); \ - if (ret != nullptr) \ - return ret; \ - } while (false) - - VISIT_KB(ABC); - VISIT_KB(DEF); - VISIT_KB(GHIJ); - VISIT_KB(KLMN); - VISIT_KB(OPQR); - VISIT_KB(STUV); - VISIT_KB(WXYZ); + auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode()); + if (specific_builder != nullptr) + return specific_builder(node, *this); -#undef VISIT_KB std::string msg = "Unsupported operator: "; msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name()); throw std::invalid_argument(msg.c_str()); } -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleAdd *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - AddParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Add>(input1, input2, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleArgMax *node) -{ - assert(node->arity() == 2); - const Tensor *input = getInputTensor(node->input()); - const Tensor *axis = getInputTensor(node->dimension()); - Tensor *output = getOutputTensor(node); - - ArgMaxParams params{}; - params.output_type = node->output_type(); - - return std::make_unique<kernels::ArgMax>(input, axis, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleAveragePool2D *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->value()); - Tensor *output = getOutputTensor(node); - - Pool2DParams params{}; - params.padding = node->padding(); - params.filter_height = node->filter()->h(); - params.filter_width = node->filter()->w(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::AveragePool2D>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleBatchToSpaceND *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *block_shape = getInputTensor(node->block_shape()); - const Tensor *crops = getInputTensor(node->crops()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleCast *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Cast>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConcatenation *node) -{ - std::vector<const Tensor *> inputs(node->numValues()); - for (uint32_t i = 0; i < node->numValues(); ++i) - { - inputs[i] = getInputTensor(node->values(i)); - } - Tensor *output = getOutputTensor(node); - - ConcatenationParams params{}; - params.axis = node->axis(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConst *) -{ - throw std::runtime_error("Const node cannot be executed."); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConv2D *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *filter = getInputTensor(node->filter()); - const Tensor *bias = getInputTensor(node->bias()); - Tensor *output = getOutputTensor(node); - - Conv2DParams params{}; - params.padding = node->padding(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.dilation_height_factor = node->dilation()->h(); - params.dilation_width_factor = node->dilation()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDepthToSpace *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->input()); - Tensor *output = getOutputTensor(node); - - DepthToSpaceParams params{}; - params.block_size = node->block_size(); - - return std::make_unique<kernels::DepthToSpace>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDepthwiseConv2D *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *filter = getInputTensor(node->filter()); - const Tensor *bias = getInputTensor(node->bias()); - Tensor *output = getOutputTensor(node); - - DepthwiseConv2DParams params{}; - params.padding = node->padding(); - params.depth_multiplier = node->depthMultiplier(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.dilation_height_factor = node->dilation()->h(); - params.dilation_width_factor = node->dilation()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDiv *node) -{ - assert(node->arity() == 2); - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - DivParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Div>(input1, input2, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleElu *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->features()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Elu>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleEqual *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Equal>(x, y, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleExp *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Exp>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFloor *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Floor>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFloorDiv *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::FloorDiv>(x, y, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFullyConnected *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *weights = getInputTensor(node->weights()); - const Tensor *bias = getOptionalInputTensor(node->bias()); - Tensor *output = getOutputTensor(node); - - FullyConnectedParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleGreater *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Greater>(x, y, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleGreaterEqual *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::GreaterEqual>(x, y, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleIf *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node); - assert(node->arity() == 1 + node->input_count()); - assert(output_nodes.size() == static_cast<size_t>(node->output_count())); - - const Tensor *cond = getInputTensor(node->cond()); - std::vector<const Tensor *> inputs(node->input_count()); - for (uint32_t i = 0; i < node->input_count(); ++i) - { - inputs[i] = getInputTensor(node->input(i)); - } - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - RuntimeGraph *then_graph = getRuntimeGraph(node->then_graph()); - RuntimeGraph *else_graph = getRuntimeGraph(node->else_graph()); - - return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph, - else_graph); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleInstanceNorm *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *gamma = getInputTensor(node->gamma()); - const Tensor *beta = getInputTensor(node->beta()); - - Tensor *output = getOutputTensor(node); - - InstanceNormParams params{}; - params.epsilon = node->epsilon(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleInput *) -{ - throw std::runtime_error("Input node cannot be executed."); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleL2Normalize *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - L2NormParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::L2Normalize>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleL2Pool2D *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->value()); - Tensor *output = getOutputTensor(node); - - Pool2DParams params{}; - params.padding = node->padding(); - params.filter_height = node->filter()->h(); - params.filter_width = node->filter()->w(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::L2Pool2D>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLeakyRelu *node) -{ - assert(node->arity() == 1); - const Tensor *input = getInputTensor(node->features()); - Tensor *output = getOutputTensor(node); - - LeakyReluParams params{}; - params.alpha = node->alpha(); - - return std::make_unique<kernels::LeakyRelu>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLess *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Less>(x, y, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLessEqual *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::LessEqual>(x, y, output); -} - -std::unique_ptr<Kernel> -KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLocalResponseNormalization *node) -{ - assert(node->arity() == 1); - const Tensor *input = getInputTensor(node->input()); - Tensor *output = getOutputTensor(node); - - LocalResponseNormalizationParams params{}; - params.radius = node->radius(); - params.bias = node->bias(); - params.alpha = node->alpha(); - params.beta = node->beta(); - - return std::make_unique<kernels::LocalResponseNormalization>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalAnd *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::LogicalAnd>(input1, input2, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalNot *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::LogicalNot>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalOr *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::LogicalOr>(input1, input2, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogistic *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Logistic>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogSoftmax *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->logits()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::LogSoftmax>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMaximum *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Maximum>(input1, input2, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMaxPool2D *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->value()); - Tensor *output = getOutputTensor(node); - - Pool2DParams params{}; - params.padding = node->padding(); - params.filter_height = node->filter()->h(); - params.filter_width = node->filter()->w(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::MaxPool2D>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMean *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *axes = getInputTensor(node->reduction_indices()); - Tensor *output = getOutputTensor(node); - - ReducerParams params{}; - params.keep_dims = node->keep_dims(); - - return std::make_unique<kernels::Mean>(input, axes, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMinimum *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Minimum>(input1, input2, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMirrorPad *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *paddings = getInputTensor(node->paddings()); - Tensor *output = getOutputTensor(node); - - MirrorPadParams params{}; - params.mode = node->mode(); - - return std::make_unique<kernels::MirrorPad>(input, paddings, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMul *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - MulParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Mul>(input1, input2, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleNeg *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Neg>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleNotEqual *node) -{ - assert(node->arity() == 2); - - const Tensor *x = getInputTensor(node->x()); - const Tensor *y = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::NotEqual>(x, y, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleOutput *) -{ - throw std::runtime_error("Output node cannot be executed."); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePack *node) -{ - assert(node->arity() == node->values_count()); - - std::vector<const Tensor *> inputs(node->values_count()); - for (uint32_t i = 0; i < node->values_count(); ++i) - { - inputs[i] = getInputTensor(node->values(i)); - } - Tensor *output = getOutputTensor(node); - - PackParams params{}; - params.axis = node->axis(); - params.values_count = node->values_count(); - - return std::make_unique<kernels::Pack>(std::move(inputs), output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePad *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *paddings = getInputTensor(node->paddings()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Pad>(input, paddings, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePadV2 *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *paddings = getInputTensor(node->paddings()); - const Tensor *constant_values = getInputTensor(node->constant_values()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePow *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Pow>(input1, input2, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePRelu *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *alpha = getInputTensor(node->alpha()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::PRelu>(input, alpha, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRelu *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->features()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Relu>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRelu6 *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->features()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Relu6>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleReshape *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->tensor()); - const Tensor *shape = getInputTensor(node->shape()); - Tensor *output = getOutputTensor(node); - - // NOTE 'newShape' attribute is ignored. - return std::make_unique<kernels::Reshape>(input, shape, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleResizeBilinear *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *size = getInputTensor(node->size()); - Tensor *output = getOutputTensor(node); - - ResizeBilinearParams params{}; - params.align_corners = node->align_corners(); - params.half_pixel_centers = node->half_pixel_centers(); - - return std::make_unique<kernels::ResizeBilinear>(input, size, output, params); -} - -std::unique_ptr<Kernel> -KernelBuilderLet<KB::OPQR>::visit(const luci::CircleResizeNearestNeighbor *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *size = getInputTensor(node->size()); - Tensor *output = getOutputTensor(node); - - ResizeNearestNeighborParams params{}; - params.align_corners = node->align_corners(); - // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated - // Current CircleResizeNearestNeighbor don't have half_pixel_centers. - // default value on current is false. - // it need to be updated when CircleResizeNearestNeighbor updated. - params.half_pixel_centers = false; - - return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleReverseV2 *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->tensor()); - const Tensor *axes = getInputTensor(node->axis()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::ReverseV2>(input, axes, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRsqrt *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Rsqrt>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSlice *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *begin = getInputTensor(node->begin()); - const Tensor *size = getInputTensor(node->size()); - - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Slice>(input, begin, size, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSoftmax *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->logits()); - Tensor *output = getOutputTensor(node); - - SoftmaxParams params{}; - params.beta = node->beta(); - - return std::make_unique<kernels::Softmax>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSpaceToBatchND *node) -{ - assert(node->arity() == 3); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *block_shape = getInputTensor(node->block_shape()); - const Tensor *paddings = getInputTensor(node->paddings()); - - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output); - ; -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSpaceToDepth *node) -{ - assert(node->arity() == 1); - const Tensor *input = getInputTensor(node->input()); - - Tensor *output = getOutputTensor(node); - - SpaceToDepthParams params{}; - params.block_size = node->block_size(); - - return std::make_unique<kernels::SpaceToDepth>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSplit *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node); - assert(node->arity() == 2); - assert(output_nodes.size() == static_cast<size_t>(node->num_split())); - - const Tensor *axis = getInputTensor(node->split_dim()); - const Tensor *input = getInputTensor(node->input()); - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - // NOTE 'num_splits' attribute is ignored. - return std::make_unique<kernels::Split>(axis, input, std::move(outputs)); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSqrt *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Sqrt>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSquare *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Square>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSquaredDifference *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::SquaredDifference>(input1, input2, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSqueeze *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->input()); - Tensor *output = getOutputTensor(node); - - SqueezeParams params{}; - params.squeeze_dims = node->squeeze_dims(); - - return std::make_unique<kernels::Squeeze>(input, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleStridedSlice *node) -{ - assert(node->arity() == 4); - - const Tensor *input = getInputTensor(node->input()); - const Tensor *begin = getInputTensor(node->begin()); - const Tensor *end = getInputTensor(node->end()); - const Tensor *strides = getInputTensor(node->strides()); - - Tensor *output = getOutputTensor(node); - - StridedSliceParams params{}; - params.begin_mask = node->begin_mask(); - params.ellipsis_mask = node->ellipsis_mask(); - params.end_mask = node->end_mask(); - params.new_axis_mask = node->new_axis_mask(); - params.shrink_axis_mask = node->shrink_axis_mask(); - - return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSub *node) -{ - assert(node->arity() == 2); - - const Tensor *input1 = getInputTensor(node->x()); - const Tensor *input2 = getInputTensor(node->y()); - Tensor *output = getOutputTensor(node); - - SubParams params{}; - params.activation = node->fusedActivationFunction(); - - return std::make_unique<kernels::Sub>(input1, input2, output, params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTanh *node) -{ - assert(node->arity() == 1); - - const Tensor *input = getInputTensor(node->x()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Tanh>(input, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTranspose *node) -{ - assert(node->arity() == 2); - - const Tensor *input = getInputTensor(node->a()); - const Tensor *perm = getInputTensor(node->perm()); - Tensor *output = getOutputTensor(node); - - return std::make_unique<kernels::Transpose>(input, perm, output); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTransposeConv *node) -{ - assert(node->arity() == 4); - - const Tensor *input_sizes = getInputTensor(node->inputSizes()); - const Tensor *filter = getInputTensor(node->filter()); - const Tensor *out_backprop = getInputTensor(node->outBackprop()); - const Tensor *bias = getOptionalInputTensor(node->bias()); - - Tensor *output = getOutputTensor(node); - - TransposeConvParams params{}; - params.padding = node->padding(); - params.stride_height = node->stride()->h(); - params.stride_width = node->stride()->w(); - - return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output, - params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleUnpack *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node); - assert(node->arity() == 1); - assert(output_nodes.size() == static_cast<size_t>(node->num())); - - const Tensor *input = getInputTensor(node->value()); - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - UnpackParams params{}; - params.axis = node->axis(); - - // NOTE 'num' attribute is ignored. - return std::make_unique<kernels::Unpack>(input, std::move(outputs), params); -} - -std::unique_ptr<Kernel> KernelBuilderLet<KB::WXYZ>::visit(const luci::CircleWhile *node) -{ - auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node); - assert(node->arity() == node->input_count()); - assert(output_nodes.size() == static_cast<size_t>(node->output_count())); - - std::vector<const Tensor *> inputs(node->input_count()); - for (uint32_t i = 0; i < node->input_count(); ++i) - { - inputs[i] = getInputTensor(node->input(i)); - } - std::vector<Tensor *> outputs = getOutputTensors(output_nodes); - - RuntimeGraph *cond_graph = getRuntimeGraph(node->cond_graph()); - RuntimeGraph *body_graph = getRuntimeGraph(node->body_graph()); - - return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph, - body_graph); -} - } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h index 406c41ef6..b1f383394 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.h +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h @@ -30,17 +30,21 @@ namespace luci_interpreter { +class KernelBuilderRegistry; + class KernelBuilder : public KernelBuilderHelper { public: KernelBuilder( const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph, - const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) - : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor) - { - } + const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); + + ~KernelBuilder(); std::unique_ptr<Kernel> build(const luci::CircleNode *node); + +private: + std::unique_ptr<KernelBuilderRegistry> _builder_registry; }; } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp index d8611243e..7a457a62f 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp +++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp @@ -16,6 +16,7 @@ #include "loader/GraphLoader.h" #include "loader/KernelBuilder.h" +#include "luci_interpreter/SimpleMemoryManager.h" #include <kernels/Add.h> #include <kernels/ArgMax.h> @@ -68,6 +69,7 @@ #include <kernels/Softmax.h> #include <kernels/SpaceToDepth.h> #include <kernels/Split.h> +#include <kernels/SplitV.h> #include <kernels/Sqrt.h> #include <kernels/SquaredDifference.h> #include <kernels/Squeeze.h> @@ -91,6 +93,9 @@ class KernelBuilderTest : public Test { protected: luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); } + void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); } + + std::unique_ptr<IMemoryManager> _memory_manager; template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args) { @@ -114,10 +119,11 @@ protected: { std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph; - RuntimeGraph runtime_graph(nullptr); + RuntimeGraph runtime_graph(nullptr, _memory_manager.get()); + graph_to_runtime_graph[&_graph] = &runtime_graph; RuntimeToIR runtime_to_ir; GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph, - _node_to_tensor); + _node_to_tensor, _memory_manager.get()); graph_loader.loadTensors(); KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor); @@ -1091,6 +1097,31 @@ TEST_F(KernelBuilderTest, Split) checkTensor(kernel->output(1), output2); } +TEST_F(KernelBuilderTest, SplitV) +{ + auto *input = createInputNode(); + auto *size_splits = createInputNode(); + auto *axis = createInputNode(); + auto *op = createNode<luci::CircleSplitV>(); + auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0); + auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1); + + op->input(input); + op->size_splits(size_splits); + op->split_dim(axis); + + op->num_split(2); + + auto kernel = buildKernel<kernels::SplitV>(op); + ASSERT_THAT(kernel, NotNull()); + + checkTensor(kernel->input(), input); + checkTensor(kernel->size_splits(), size_splits); + checkTensor(kernel->axis(), axis); + checkTensor(kernel->output(0), output0); + checkTensor(kernel->output(1), output1); +} + TEST_F(KernelBuilderTest, Sqrt) { auto *input = createInputNode(); diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h index 4517d1f19..d6fb253b1 100644 --- a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h +++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h @@ -39,7 +39,7 @@ public: { } -protected: +public: const Tensor *getInputTensor(const loco::Node *node) const; const Tensor *getOptionalInputTensor(const loco::Node *node) const; @@ -48,7 +48,7 @@ protected: RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const; -protected: +public: const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const { return _graph_to_runtime_graph; @@ -64,6 +64,21 @@ private: const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor; }; +template <typename CircleNodeOut> +std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node) +{ + std::vector<const CircleNodeOut *> output_nodes; + for (const loco::Node *loco_node : loco::succs(node)) + { + output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node)); + } + std::sort(output_nodes.begin(), output_nodes.end(), + [](const CircleNodeOut *node1, const CircleNodeOut *node2) { + return node1->index() < node2->index(); + }); + return {output_nodes.cbegin(), output_nodes.cend()}; +} + } // namespace luci_interpreter #endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp index ff211bf09..2f278b087 100644 --- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp +++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp @@ -23,9 +23,10 @@ namespace luci_interpreter ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, RuntimeToIR &runtime_to_ir, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor) + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager) : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir), - _node_to_tensor(node_to_tensor) + _node_to_tensor(node_to_tensor), _memory_manager(memory_manager) { } @@ -35,14 +36,14 @@ void ModuleLoader::load() // process for control flow nodes. for (size_t i = 0; i < _module->size(); ++i) { - _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph()); + _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager)); } for (size_t i = 0; i < _module->size(); ++i) { const loco::Graph *graph = _module->graph(i); RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph); GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph, - _node_to_tensor); + _node_to_tensor, _memory_manager); loader.loadTensors(); loader.initInputOutputTensors(); loader.loadOperators(); diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h index 1af0ed747..11326a2ee 100644 --- a/compiler/luci-interpreter/src/loader/ModuleLoader.h +++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h @@ -19,6 +19,7 @@ #include "core/RuntimeModule.h" #include "loader/RuntimeToIR.h" +#include "luci_interpreter/MemoryManager.h" #include <luci/IR/Module.h> @@ -32,11 +33,13 @@ class ModuleLoader public: ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module, RuntimeToIR &runtime_to_ir, - std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor); + std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, + IMemoryManager *memory_manager); void load(); private: + IMemoryManager *_memory_manager; const luci::Module *_module; RuntimeModule *_runtime_module; RuntimeToIR &_runtime_to_ir; diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp new file mode 100644 index 000000000..decccaa1d --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Add.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleAdd *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + AddParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Add>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp new file mode 100644 index 000000000..0ee367748 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ArgMax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleArgMax *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axis = helper.getInputTensor(node->dimension()); + Tensor *output = helper.getOutputTensor(node); + + ArgMaxParams params{}; + params.output_type = node->output_type(); + + return std::make_unique<kernels::ArgMax>(input, axis, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp new file mode 100644 index 000000000..5bc37bd4a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/AveragePool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleAveragePool2D *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::AveragePool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp new file mode 100644 index 000000000..33d0e2db6 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/BatchToSpaceND.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleBatchToSpaceND *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *crops = helper.getInputTensor(node->crops()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-interpreter/src/loader/nodes/Builders.h new file mode 100644 index 000000000..eab284008 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Builders.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H +#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H + +#include "loader/KernelBuilderHelper.h" + +#include "luci/IR/CircleNodes.h" + +namespace luci_interpreter +{ + +#define REGISTER_KERNEL(name) \ + std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \ + KernelBuilderHelper &helper); + +#include "KernelsToBuild.lst" + +#undef REGISTER_KERNEL + +} // namespace luci_interpreter + +#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp new file mode 100644 index 000000000..21ea5ceab --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Cast.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleCast *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Cast>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp new file mode 100644 index 000000000..7823a9967 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Concatenation.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleConcatenation *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + std::vector<const Tensor *> inputs(node->numValues()); + for (uint32_t i = 0; i < node->numValues(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + ConcatenationParams params{}; + params.axis = node->axis(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp new file mode 100644 index 000000000..71c8ef3e4 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Conv2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleConv2D *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + auto im2col = + std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, ""); + im2col->set_observable(false); + im2col->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col)); + + Conv2DParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp new file mode 100644 index 000000000..0310fb23f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthToSpace.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleDepthToSpace *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + DepthToSpaceParams params{}; + params.block_size = node->block_size(); + + return std::make_unique<kernels::DepthToSpace>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp new file mode 100644 index 000000000..c2f0346a2 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/DepthwiseConv2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleDepthwiseConv2D *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *bias = helper.getInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + DepthwiseConv2DParams params{}; + params.padding = node->padding(); + params.depth_multiplier = node->depthMultiplier(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.dilation_height_factor = node->dilation()->h(); + params.dilation_width_factor = node->dilation()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp new file mode 100644 index 000000000..56c2e98f2 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Div.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleDiv *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + DivParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Div>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp new file mode 100644 index 000000000..98ee78be7 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Elu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleElu *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Elu>(input, output); +} +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp new file mode 100644 index 000000000..649d9bfe9 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Equal.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) + +{ + const auto *node = dynamic_cast<const luci::CircleEqual *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Equal>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp new file mode 100644 index 000000000..411d142c3 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Exp.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleExp *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Exp>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp new file mode 100644 index 000000000..6d8435f6c --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Floor.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleFloor *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Floor>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp new file mode 100644 index 000000000..cae2e186e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FloorDiv.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleFloorDiv *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::FloorDiv>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp new file mode 100644 index 000000000..2917598fc --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/FullyConnected.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleFullyConnected *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *weights = helper.getInputTensor(node->weights()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + Tensor *output = helper.getOutputTensor(node); + + FullyConnectedParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp new file mode 100644 index 000000000..3db11b840 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Greater.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleGreater *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Greater>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp new file mode 100644 index 000000000..dbe051d67 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/GreaterEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleGreaterEqual *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::GreaterEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp new file mode 100644 index 000000000..5983f4d3b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/If.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleIf *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node); + assert(node->arity() == 1 + node->input_count()); + assert(output_nodes.size() == static_cast<size_t>(node->output_count())); + + const Tensor *cond = helper.getInputTensor(node->cond()); + std::vector<const Tensor *> inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph()); + RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph()); + + return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph, + else_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp new file mode 100644 index 000000000..0a8fb85e2 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/InstanceNorm.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleInstanceNorm *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *gamma = helper.getInputTensor(node->gamma()); + const Tensor *beta = helper.getInputTensor(node->beta()); + + Tensor *output = helper.getOutputTensor(node); + + InstanceNormParams params{}; + params.epsilon = node->epsilon(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp new file mode 100644 index 000000000..05f920266 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Normalize.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleL2Normalize *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + L2NormParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::L2Normalize>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp new file mode 100644 index 000000000..0e70afafa --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/L2Pool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleL2Pool2D *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::L2Pool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp new file mode 100644 index 000000000..7b229ad0e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LeakyRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLeakyRelu *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + LeakyReluParams params{}; + params.alpha = node->alpha(); + + return std::make_unique<kernels::LeakyRelu>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp new file mode 100644 index 000000000..81156f275 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Less.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLess *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Less>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp new file mode 100644 index 000000000..82141e5ae --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LessEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLessEqual *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LessEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp new file mode 100644 index 000000000..a12dce0a0 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LocalResponseNormalization.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLocalResponseNormalization *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + LocalResponseNormalizationParams params{}; + params.radius = node->radius(); + params.bias = node->bias(); + params.alpha = node->alpha(); + params.beta = node->beta(); + + return std::make_unique<kernels::LocalResponseNormalization>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp new file mode 100644 index 000000000..6cf547aae --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogSoftmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLogSoftmax *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogSoftmax>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp new file mode 100644 index 000000000..2c9549f71 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalAnd.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLogicalAnd *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalAnd>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp new file mode 100644 index 000000000..3d327d6c4 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalNot.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLogicalNot *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalNot>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp new file mode 100644 index 000000000..50566bb30 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/LogicalOr.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLogicalOr *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::LogicalOr>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp new file mode 100644 index 000000000..e4160edb3 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Logistic.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleLogistic *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Logistic>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp new file mode 100644 index 000000000..914f22838 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MaxPool2D.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleMaxPool2D *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->value()); + Tensor *output = helper.getOutputTensor(node); + + Pool2DParams params{}; + params.padding = node->padding(); + params.filter_height = node->filter()->h(); + params.filter_width = node->filter()->w(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::MaxPool2D>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp new file mode 100644 index 000000000..dc50d6773 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Maximum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleMaximum *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Maximum>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp new file mode 100644 index 000000000..97d91207f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mean.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleMean *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *axes = helper.getInputTensor(node->reduction_indices()); + Tensor *output = helper.getOutputTensor(node); + + auto temp_index_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + temp_index_unique->set_observable(false); + temp_index_unique->set_data_buffer(nullptr); + Tensor *temp_index = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique)); + + auto resolved_axes_unique = + std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, ""); + resolved_axes_unique->set_observable(false); + resolved_axes_unique->set_data_buffer(nullptr); + Tensor *resolved_axes = + helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique)); + + auto temp_sum_unique = + std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, ""); + temp_sum_unique->set_observable(false); + temp_sum_unique->set_data_buffer(nullptr); + Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique)); + + ReducerParams params{}; + params.keep_dims = node->keep_dims(); + + return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum, + params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp new file mode 100644 index 000000000..ff659524a --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Minimum.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleMinimum *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Minimum>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp new file mode 100644 index 000000000..ebf294583 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/MirrorPad.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleMirrorPad *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + MirrorPadParams params{}; + params.mode = node->mode(); + + return std::make_unique<kernels::MirrorPad>(input, paddings, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp new file mode 100644 index 000000000..4f9da967d --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Mul.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleMul *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + MulParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Mul>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp new file mode 100644 index 000000000..23c00537b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Neg.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleNeg *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Neg>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp new file mode 100644 index 000000000..8e5711fc1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/NotEqual.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleNotEqual *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *x = helper.getInputTensor(node->x()); + const Tensor *y = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::NotEqual>(x, y, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp new file mode 100644 index 000000000..e31601bf6 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PRelu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CirclePRelu *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *alpha = helper.getInputTensor(node->alpha()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::PRelu>(input, alpha, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp new file mode 100644 index 000000000..699472081 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pack.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CirclePack *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == node->values_count()); + + std::vector<const Tensor *> inputs(node->values_count()); + for (uint32_t i = 0; i < node->values_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->values(i)); + } + Tensor *output = helper.getOutputTensor(node); + + PackParams params{}; + params.axis = node->axis(); + params.values_count = node->values_count(); + + return std::make_unique<kernels::Pack>(std::move(inputs), output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp new file mode 100644 index 000000000..770549295 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pad.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CirclePad *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Pad>(input, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp new file mode 100644 index 000000000..12deb15f0 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/PadV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CirclePadV2 *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + const Tensor *constant_values = helper.getInputTensor(node->constant_values()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp new file mode 100644 index 000000000..b430bc94f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Pow.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CirclePow *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Pow>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp new file mode 100644 index 000000000..d53a66a06 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleRelu *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Relu>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp new file mode 100644 index 000000000..f1b5d219b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Relu6.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleRelu6 *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->features()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Relu6>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp new file mode 100644 index 000000000..89e3ecebf --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Reshape.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleReshape *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *shape = helper.getInputTensor(node->shape()); + Tensor *output = helper.getOutputTensor(node); + + // NOTE 'newShape' attribute is ignored. + return std::make_unique<kernels::Reshape>(input, shape, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp new file mode 100644 index 000000000..dca56588d --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeBilinear.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleResizeBilinear *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeBilinearParams params{}; + params.align_corners = node->align_corners(); + params.half_pixel_centers = node->half_pixel_centers(); + + return std::make_unique<kernels::ResizeBilinear>(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp new file mode 100644 index 000000000..d1ea19c0f --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ResizeNearestNeighbor.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> +build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleResizeNearestNeighbor *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *size = helper.getInputTensor(node->size()); + Tensor *output = helper.getOutputTensor(node); + + ResizeNearestNeighborParams params{}; + params.align_corners = node->align_corners(); + // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated + // Current CircleResizeNearestNeighbor don't have half_pixel_centers. + // default value on current is false. + // it need to be updated when CircleResizeNearestNeighbor updated. + params.half_pixel_centers = false; + + return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp new file mode 100644 index 000000000..ea00f5408 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/ReverseV2.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleReverseV2 *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->tensor()); + const Tensor *axes = helper.getInputTensor(node->axis()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::ReverseV2>(input, axes, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp new file mode 100644 index 000000000..ff87f435c --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Rsqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleRsqrt *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Rsqrt>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp new file mode 100644 index 000000000..741cd0806 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Slice.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSlice *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *size = helper.getInputTensor(node->size()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Slice>(input, begin, size, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp new file mode 100644 index 000000000..b15e4b6f3 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Softmax.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSoftmax *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->logits()); + Tensor *output = helper.getOutputTensor(node); + + SoftmaxParams params{}; + params.beta = node->beta(); + + return std::make_unique<kernels::Softmax>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp new file mode 100644 index 000000000..91c237aa5 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToBatchND.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSpaceToBatchND *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 3); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *block_shape = helper.getInputTensor(node->block_shape()); + const Tensor *paddings = helper.getInputTensor(node->paddings()); + + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp new file mode 100644 index 000000000..3cbbd9718 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SpaceToDepth.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSpaceToDepth *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + const Tensor *input = helper.getInputTensor(node->input()); + + Tensor *output = helper.getOutputTensor(node); + + SpaceToDepthParams params{}; + params.block_size = node->block_size(); + + return std::make_unique<kernels::SpaceToDepth>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp new file mode 100644 index 000000000..32553ad5e --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Split.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSplit *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node); + assert(node->arity() == 2); + assert(output_nodes.size() == static_cast<size_t>(node->num_split())); + + const Tensor *axis = helper.getInputTensor(node->split_dim()); + const Tensor *input = helper.getInputTensor(node->input()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique<kernels::Split>(axis, input, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp new file mode 100644 index 000000000..d78816447 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SplitV.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSplitV *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node); + assert(node->arity() == 3); + assert(output_nodes.size() == static_cast<size_t>(node->num_split())); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *sizes_data = helper.getInputTensor(node->size_splits()); + const Tensor *axis = helper.getInputTensor(node->split_dim()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + // NOTE 'num_splits' attribute is ignored. + return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs)); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp new file mode 100644 index 000000000..56dd986f1 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sqrt.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSqrt *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Sqrt>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp new file mode 100644 index 000000000..43aadb969 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Square.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSquare *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Square>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp new file mode 100644 index 000000000..6a2717aa2 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/SquaredDifference.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSquaredDifference *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::SquaredDifference>(input1, input2, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp new file mode 100644 index 000000000..583ff9314 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Squeeze.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSqueeze *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->input()); + Tensor *output = helper.getOutputTensor(node); + + SqueezeParams params{}; + params.squeeze_dims = node->squeeze_dims(); + + return std::make_unique<kernels::Squeeze>(input, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp new file mode 100644 index 000000000..fe5fa7707 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/StridedSlice.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleStridedSlice *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 4); + + const Tensor *input = helper.getInputTensor(node->input()); + const Tensor *begin = helper.getInputTensor(node->begin()); + const Tensor *end = helper.getInputTensor(node->end()); + const Tensor *strides = helper.getInputTensor(node->strides()); + + Tensor *output = helper.getOutputTensor(node); + + StridedSliceParams params{}; + params.begin_mask = node->begin_mask(); + params.ellipsis_mask = node->ellipsis_mask(); + params.end_mask = node->end_mask(); + params.new_axis_mask = node->new_axis_mask(); + params.shrink_axis_mask = node->shrink_axis_mask(); + + return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp new file mode 100644 index 000000000..bad4fbb13 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Sub.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleSub *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input1 = helper.getInputTensor(node->x()); + const Tensor *input2 = helper.getInputTensor(node->y()); + Tensor *output = helper.getOutputTensor(node); + + SubParams params{}; + params.activation = node->fusedActivationFunction(); + + return std::make_unique<kernels::Sub>(input1, input2, output, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp new file mode 100644 index 000000000..f4255291b --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Tanh.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleTanh *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 1); + + const Tensor *input = helper.getInputTensor(node->x()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Tanh>(input, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp new file mode 100644 index 000000000..4e095fbbc --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Transpose.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleTranspose *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 2); + + const Tensor *input = helper.getInputTensor(node->a()); + const Tensor *perm = helper.getInputTensor(node->perm()); + Tensor *output = helper.getOutputTensor(node); + + return std::make_unique<kernels::Transpose>(input, perm, output); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp new file mode 100644 index 000000000..1b954c35c --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/TransposeConv.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleTransposeConv *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + assert(node->arity() == 4); + + const Tensor *input_sizes = helper.getInputTensor(node->inputSizes()); + const Tensor *filter = helper.getInputTensor(node->filter()); + const Tensor *out_backprop = helper.getInputTensor(node->outBackprop()); + const Tensor *bias = helper.getOptionalInputTensor(node->bias()); + + Tensor *output = helper.getOutputTensor(node); + + DataType scratch_data_type = + helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32; + + auto scratch_tensor = + std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, ""); + scratch_tensor->set_observable(false); + scratch_tensor->set_data_buffer(nullptr); + Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor)); + + TransposeConvParams params{}; + params.padding = node->padding(); + params.stride_height = node->stride()->h(); + params.stride_width = node->stride()->w(); + + return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output, + tmp, params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp new file mode 100644 index 000000000..978c738c6 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/Unpack.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleUnpack *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node); + assert(node->arity() == 1); + assert(output_nodes.size() == static_cast<size_t>(node->num())); + + const Tensor *input = helper.getInputTensor(node->value()); + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + UnpackParams params{}; + params.axis = node->axis(); + + // NOTE 'num' attribute is ignored. + return std::make_unique<kernels::Unpack>(input, std::move(outputs), params); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp new file mode 100644 index 000000000..284dc0c68 --- /dev/null +++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Builders.h" + +#include "kernels/While.h" + +namespace luci_interpreter +{ + +std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node, + KernelBuilderHelper &helper) +{ + const auto *node = dynamic_cast<const luci::CircleWhile *>(circle_node); + if (node == nullptr) + throw std::runtime_error("wrong builder for operation"); + + auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node); + assert(node->arity() == node->input_count()); + assert(output_nodes.size() == static_cast<size_t>(node->output_count())); + + std::vector<const Tensor *> inputs(node->input_count()); + for (uint32_t i = 0; i < node->input_count(); ++i) + { + inputs[i] = helper.getInputTensor(node->input(i)); + } + std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes); + + RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph()); + RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph()); + + return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph, + body_graph); +} + +} // namespace luci_interpreter diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt new file mode 100644 index 000000000..d936e12ba --- /dev/null +++ b/compiler/luci-micro/CMakeLists.txt @@ -0,0 +1,57 @@ +set(ARM_C_COMPILER "arm-none-eabi-gcc") +set(ARM_ASM_COMPILER "arm-none-eabi-gcc") +set(ARM_CXX_COMPILER "arm-none-eabi-g++") +set(ARM_OBJCOPY "arm-none-eabi-objcopy") + +find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER}) + +if(NOT ARM_C_COMPILER_PATH) + message(WARNING "ARM compiler is NOT FOUND, skipping luci-micro build") + return() +endif() + +set(CMAKE_ARM_OPTIONS + -DLUCI_INTERPRETER_STATIC=ON + -DLUCI_STATIC=ON + "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/standalone/Toolchain.cmake" + "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu" + "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}" + "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}" + -DCPU_ARCH=arm + -DC_COMPILER=${ARM_C_COMPILER} + -DCXX_COMPILER=${ARM_CXX_COMPILER} + -DASM_COMPILER=${ARM_ASM_COMPILER} + -DOBJCOPY=${ARM_OBJCOPY} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DENABLE_TEST=OFF + -DBUILD_GTEST=OFF + "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}" + -DENABLE_STRICT_BUILD=OFF +) + +set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm") +file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}") + +set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt") + +add_custom_command( + OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}" + COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS} + WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt" + VERBATIM +) + +add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}") + +set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/compiler/luci-interpreter/src/libluci_interpreter.a") + +add_custom_command( + OUTPUT "${MICRO_ARM_BINARY}" + COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter -j ${CPU_COUNT} + WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}" + DEPENDS luci_interpreter_micro_arm_cmake + VERBATIM +) + +add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}") diff --git a/compiler/luci-micro/README.md b/compiler/luci-micro/README.md new file mode 100644 index 000000000..6641ad7a7 --- /dev/null +++ b/compiler/luci-micro/README.md @@ -0,0 +1,56 @@ +# luci-micro + +`luci-micro` is MCU specialized build of luci-interpreter with several benchmark applications. + +## Contents + +Luci-micro contains cmake infrastructure to build: +- stand-alone interpreter library +- benchmark applications using luci interpreter on arm MCUs + +## How to build stand alone library + +Stand-alone library is simply built by `luci_interpreter_micro_arm` target. +Result library will be placed in `<ONE root>/build/compiler/luci-micro/standalone_arm/luci-interpreter/src/libluci_interpreter.a`. + +### Prerequisites + +- Everything you need for ONE project: see [how-to-build-compiler.md](../../docs/howto/how-to-build-compiler.md) +- arm-none-eabi-gcc and arm-none-eabi-g++ compilers + +To install needed arm compilers on ubuntu: +``` +$ sudo apt-get install gcc-arm-none-eabi +``` + +**cmake build** + +``` bash +$ cd <path to ONE> +$ mkdir build +# cd build +$ cmake ../infra/nncc +$ make -j$(nproc) luci_interpreter_micro_arm +``` + +**nncc script build** + +``` bash +$ cd <path to ONE> +$ ./nncc configure +$ ./nncc build -j$(nproc) luci_interpreter_micro_arm +``` + +### Known issues + +Interpreter uses TensorFlow headers that produces warnings. + +`Linux` x86 build uses "-isystem" flag to suppress warnings from external sources, +but some old arm compilers have issues with it: +[bug](https://bugs.launchpad.net/gcc-arm-embedded/+bug/1698539) + +`-isystem` hack is disabled for MCU build, because of this MCU build is broken if `-Werror` flag is set. + +## How to use + +TBD diff --git a/compiler/luci-micro/requires.cmake b/compiler/luci-micro/requires.cmake new file mode 100644 index 000000000..5913aa9ad --- /dev/null +++ b/compiler/luci-micro/requires.cmake @@ -0,0 +1 @@ +require(luci-interpreter) diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt new file mode 100644 index 000000000..7953359ad --- /dev/null +++ b/compiler/luci-micro/standalone/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.10) +project(luci_interpreter_micro_standalone) + +# Add fake target, so nothing is build +set(BUILD_WHITELIST "dummy") + +add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc) + +set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler") + +add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv) +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang) + +add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter) diff --git a/compiler/luci-micro/standalone/Toolchain.cmake b/compiler/luci-micro/standalone/Toolchain.cmake new file mode 100644 index 000000000..2d23b5de5 --- /dev/null +++ b/compiler/luci-micro/standalone/Toolchain.cmake @@ -0,0 +1,8 @@ +set(CMAKE_SYSTEM_NAME Generic) + +set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +set(CMAKE_C_COMPILER "${C_COMPILER}") +set(CMAKE_CXX_COMPILER "${CXX_COMPILER}") +set(CMAKE_ASM_COMPILER "${ASM_COMPILER}") +set(CMAKE_OBJCOPY "${OBJCOPY}") diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt index 2d2befe57..b31415870 100644 --- a/compiler/luci-pass-value-test/CMakeLists.txt +++ b/compiler/luci-pass-value-test/CMakeLists.txt @@ -38,7 +38,7 @@ add_test(NAME luci_pass_value_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh" "${CMAKE_CURRENT_BINARY_DIR}" "${ARTIFACTS_BIN_PATH}" - "${NNCC_OVERLAY_DIR}/venv_2_3_0" + "${NNCC_OVERLAY_DIR}/venv_2_6_0" "$<TARGET_FILE:luci_eval_driver>" ${LUCI_PASS_VALUE_TESTS} ) diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt index 124f120d4..3c7185b80 100644 --- a/compiler/luci-value-test/CMakeLists.txt +++ b/compiler/luci-value-test/CMakeLists.txt @@ -18,7 +18,7 @@ add_test(NAME luci_value_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh" "${CMAKE_CURRENT_BINARY_DIR}" "${ARTIFACTS_BIN_PATH}" - "${NNCC_OVERLAY_DIR}/venv_2_3_0" + "${NNCC_OVERLAY_DIR}/venv_2_6_0" "$<TARGET_FILE:luci_eval_driver>" ${LUCI_VALUE_TESTS} ) diff --git a/compiler/luci-value-test/README.md b/compiler/luci-value-test/README.md index 90e92834b..6f1d0d54f 100644 --- a/compiler/luci-value-test/README.md +++ b/compiler/luci-value-test/README.md @@ -5,11 +5,15 @@ The test proceeds as follows Step 1: Generate tflite files and circle files from TFLite recipes (listsed in test.lst). +``` "TFLite recipe" -> tflchef -> "tflite file" -> tflite2circle -> "circle file" +``` Step 2: Run TFLite interpreter and luci-interpreter for the generated tflite and circle, respectively. (with the same input tensors filled with random values) +``` circle file -> luci-interpreter -------> Execution result 1 tflite file -> TFLite interpreter -----> Execution result 2 +``` Step 3: Compare the execution result 1 and 2. The result must be the same. diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py index f6b0620d8..a76bd1403 100755 --- a/compiler/luci-value-test/luci_eval_verifier.py +++ b/compiler/luci-value-test/luci_eval_verifier.py @@ -64,41 +64,23 @@ for idx in range(len(interpreter.get_output_details())): shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r') output_shape = [int(i) for i in shape_file.read().split(',')] luci_output_data = np.reshape(output_data, output_shape) + intp_output_data = interpreter.get_tensor(output_details["index"]) try: if output_details["dtype"] == np.uint8: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) elif output_details["dtype"] == np.float32: if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=1.e-5, - atol=1.e-5) == False: + luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) elif output_details["dtype"] == np.int64: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) elif output_details["dtype"] == np.int32: - if np.allclose( - luci_output_data, - interpreter.get_tensor( - interpreter.get_output_details()[idx]["index"]), - rtol=0, - atol=0) == False: + if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False: raise SystemExit("Execution result of " + tflite_model + " does not match with " + circle_model) else: diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt index 95c349c0d..9dcf1b55d 100644 --- a/compiler/luci/CMakeLists.txt +++ b/compiler/luci/CMakeLists.txt @@ -1,3 +1,14 @@ +# Some targets do not support dynamic linking: MCU, TrustZone applications, etc. +# STATIC_LUCI option allows us to compile luci and luci related components safely +# and suppress various cmake warnings. +# +# Currently this feature is used for luci-interpreter MCU builds. +if (STATIC_LUCI) + set(LIBRARY_TYPE "STATIC") +else() + set(LIBRARY_TYPE "SHARED") +endif() + add_subdirectory(env) add_subdirectory(log) add_subdirectory(lang) @@ -6,6 +17,7 @@ add_subdirectory(testhelper) add_subdirectory(service) add_subdirectory(pass) add_subdirectory(profile) +add_subdirectory(plan) add_subdirectory(partition) add_subdirectory(import) add_subdirectory(export) diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt index 4d1a89ad1..bba515551 100644 --- a/compiler/luci/env/CMakeLists.txt +++ b/compiler/luci/env/CMakeLists.txt @@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_env SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_env ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_env PUBLIC include) target_link_libraries(luci_env PRIVATE nncc_common) install(TARGETS luci_env DESTINATION lib) diff --git a/compiler/luci/env/include/luci/UserSettings.h b/compiler/luci/env/include/luci/UserSettings.h index b56bd65e2..9fe9592e5 100644 --- a/compiler/luci/env/include/luci/UserSettings.h +++ b/compiler/luci/env/include/luci/UserSettings.h @@ -33,6 +33,7 @@ struct UserSettings MuteWarnings, DisableValidation, ProfilingDataGen, + ExecutionPlanGen, }; static UserSettings *settings(); diff --git a/compiler/luci/env/src/UserSettings.cpp b/compiler/luci/env/src/UserSettings.cpp index b4c661190..136fee799 100644 --- a/compiler/luci/env/src/UserSettings.cpp +++ b/compiler/luci/env/src/UserSettings.cpp @@ -31,6 +31,7 @@ private: bool _MuteWarnings{false}; bool _DisableValidation{false}; bool _ProfilingDataGen{false}; + bool _ExecutionPlanGen{false}; }; void UserSettingsImpl::set(const Key key, bool value) @@ -46,6 +47,9 @@ void UserSettingsImpl::set(const Key key, bool value) case Key::ProfilingDataGen: _ProfilingDataGen = value; break; + case Key::ExecutionPlanGen: + _ExecutionPlanGen = value; + break; default: throw std::runtime_error("Invalid key in boolean set"); break; @@ -62,6 +66,8 @@ bool UserSettingsImpl::get(const Key key) const return _DisableValidation; case Key::ProfilingDataGen: return _ProfilingDataGen; + case Key::ExecutionPlanGen: + return _ExecutionPlanGen; default: throw std::runtime_error("Invalid key in boolean get"); break; diff --git a/compiler/luci/env/src/UserSettings.test.cpp b/compiler/luci/env/src/UserSettings.test.cpp index 899c0c2a1..26c606edb 100644 --- a/compiler/luci/env/src/UserSettings.test.cpp +++ b/compiler/luci/env/src/UserSettings.test.cpp @@ -39,6 +39,18 @@ TEST(UserSettings, MuteWarnings) ASSERT_TRUE(settings->get(luci::UserSettings::Key::MuteWarnings)); } +TEST(UserSettings, MuteWarnings_NEG) +{ + auto settings = luci::UserSettings::settings(); + ASSERT_NE(nullptr, settings); + + settings->set(luci::UserSettings::Key::MuteWarnings, false); + ASSERT_FALSE(settings->get(luci::UserSettings::Key::MuteWarnings)); + + settings->set(luci::UserSettings::Key::MuteWarnings, true); + ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation)); +} + TEST(UserSettings, DisableValidation) { auto settings = luci::UserSettings::settings(); @@ -51,6 +63,18 @@ TEST(UserSettings, DisableValidation) ASSERT_TRUE(settings->get(luci::UserSettings::Key::DisableValidation)); } +TEST(UserSettings, DisableValidation_NEG) +{ + auto settings = luci::UserSettings::settings(); + ASSERT_NE(nullptr, settings); + + settings->set(luci::UserSettings::Key::DisableValidation, false); + ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation)); + + settings->set(luci::UserSettings::Key::DisableValidation, true); + ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen)); +} + TEST(UserSettings, ProfilingDataGen) { auto settings = luci::UserSettings::settings(); diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt index 5c0077625..2b41a6248 100644 --- a/compiler/luci/export/CMakeLists.txt +++ b/compiler/luci/export/CMakeLists.txt @@ -3,7 +3,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") #file(GLOB_RECURSE TESTS "src/*.test.cpp") #list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_export SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_export ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_export PRIVATE src) target_include_directories(luci_export PUBLIC include) target_link_libraries(luci_export PRIVATE luci_lang) @@ -14,6 +18,7 @@ target_link_libraries(luci_export PRIVATE luci_env) target_link_libraries(luci_export PRIVATE luci_log) target_link_libraries(luci_export PRIVATE luci_logex) target_link_libraries(luci_export PRIVATE luci_profile) +target_link_libraries(luci_export PRIVATE luci_plan) target_link_libraries(luci_export PRIVATE nncc_common) target_link_libraries(luci_export PRIVATE locop) target_link_libraries(luci_export PRIVATE oops) diff --git a/compiler/luci/export/src/CircleExportMetadata.cpp b/compiler/luci/export/src/CircleExportMetadata.cpp index ef905a882..017002f5c 100644 --- a/compiler/luci/export/src/CircleExportMetadata.cpp +++ b/compiler/luci/export/src/CircleExportMetadata.cpp @@ -44,6 +44,31 @@ flatbuffers::Offset<circle::Metadata> metadata_offset(flatbuffers::FlatBufferBui namespace luci { +// 'execution_plan_table' is encoded to binary format. +const std::vector<uint8_t> CircleExportMetadata::encoded_execution_plan_table() +{ + std::vector<uint8_t> data; + + write_u32(data, _execution_plan_table.size()); + + for (auto &kv : _execution_plan_table) + { + const auto id = kv.first; + write_u32(data, id); + + const auto plan_vector = kv.second; + const auto size = plan_vector.size(); + write_u32(data, size); + + for (auto elem : plan_vector) + { + write_u32(data, elem); + } + } + + return data; +} + // 'source_table' is encoded to binary format. const std::vector<uint8_t> CircleExportMetadata::encoded_source_table(void) { @@ -114,7 +139,11 @@ createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::Serial metadata_vec.emplace_back( metadata_offset(builder, md, md._metadata.encoded_op_table(), "ONE_op_table")); } - + if (settings->get(luci::UserSettings::Key::ExecutionPlanGen)) + { + metadata_vec.emplace_back(metadata_offset( + builder, md, md._metadata.encoded_execution_plan_table(), "ONE_execution_plan_table")); + } return metadata_vec; } diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp index 014d9bd61..be64a52d4 100644 --- a/compiler/luci/export/src/CircleOperationExporter.cpp +++ b/compiler/luci/export/src/CircleOperationExporter.cpp @@ -22,6 +22,7 @@ #include <luci/IR/CircleNodes.h> #include <luci/IR/CircleNodeVisitor.h> #include <luci/Profile/CircleNodeOrigin.h> +#include <luci/Plan/CircleNodeExecutionPlan.h> #include <luci/UserSettings.h> #include <luci/Log.h> @@ -1684,7 +1685,7 @@ void OpExporterLet<OE::CIRC>::visit(luci::CircleInstanceNorm *node) } void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md, - SerializedGraphData &gd) + SerializedGraphData &gd, uint32_t node_position) { if (auto circle_node = dynamic_cast<luci::CircleNode *>(node)) { @@ -1702,6 +1703,19 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria md._metadata.add_op_table(node_id, source->id()); } } + if (has_execution_plan(circle_node)) + { + // Add to node (in node_position) metadata vector with execution_plan information: + // order of execution, and offsets output tensors. + const auto execution_plan = get_execution_plan(circle_node); + std::vector<uint32_t> execution_plan_vector; + execution_plan_vector.push_back(execution_plan.order_in_plan()); + for (auto offset : execution_plan.offsets()) + { + execution_plan_vector.push_back(offset); + } + md._metadata.add_execution_plan_table(node_position, execution_plan_vector); + } } else { @@ -1717,9 +1731,11 @@ namespace luci void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md, SerializedGraphData &gd) { + uint32_t node_position = 0; for (auto node : loco::postorder_traversal(loco::output_nodes(g))) { - exportNode(node, builder, md, gd); + exportNode(node, builder, md, gd, node_position); + node_position++; } } diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h index 95f7b5755..a945eecf7 100644 --- a/compiler/luci/export/src/SerializedData.h +++ b/compiler/luci/export/src/SerializedData.h @@ -20,6 +20,7 @@ #include <mio/circle/schema_generated.h> #include <luci/IR/CircleNodes.h> +#include <luci/IR/ExecutionPlanTable.h> #include <vector> @@ -63,13 +64,23 @@ public: _op_table.at(node_id).emplace(source_id); } + void add_execution_plan_table(uint32_t node_id, + const std::vector<uint32_t> &execution_plan_inform) + { + _execution_plan_table[node_id] = execution_plan_inform; + } + public: const std::vector<uint8_t> encoded_source_table(void); const std::vector<uint8_t> encoded_op_table(void); + const std::vector<uint8_t> encoded_execution_plan_table(void); private: std::map<uint32_t, std::string> _source_table; std::map<uint32_t, std::set<uint32_t>> _op_table; + // _exec_plan_table stores for node with node_id order of execution, and memory offsets: + // first go execution order, then memory offsets for node output tensors. + luci::ExecutionPlanTable _execution_plan_table; }; } // namespace luci diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt index 4e200f6ae..1df569d11 100644 --- a/compiler/luci/import/CMakeLists.txt +++ b/compiler/luci/import/CMakeLists.txt @@ -2,11 +2,16 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_import SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_import ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_import PRIVATE src) target_include_directories(luci_import PUBLIC include) target_link_libraries(luci_import PUBLIC luci_lang) target_link_libraries(luci_import PUBLIC luci_profile) +target_link_libraries(luci_import PUBLIC luci_plan) target_link_libraries(luci_import PUBLIC mio_circle) target_link_libraries(luci_import PRIVATE luci_env) target_link_libraries(luci_import PRIVATE luci_log) diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp index f68f3301a..42dcebdaa 100644 --- a/compiler/luci/import/src/CircleImportMetadata.cpp +++ b/compiler/luci/import/src/CircleImportMetadata.cpp @@ -134,6 +134,55 @@ decoded_op_table(const std::vector<uint8_t> &op_table_data) return node_source_ids_map; } +// 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format. +const luci::ExecutionPlanTable +decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data) +{ + luci::ExecutionPlanTable execution_plan_table; + uint32_t idx = 0; + + if (execution_plan_data.size() < 4) + throw std::runtime_error("Op table decode error : invalid entry number"); + + uint32_t entry_number = read_u32(execution_plan_data, idx); + idx += sizeof(uint32_t); + + while (idx < execution_plan_data.size()) + { + if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size()) + throw std::runtime_error("Op table decode error : invalid entry item"); + + uint32_t id = read_u32(execution_plan_data, idx); + idx += sizeof(uint32_t); + + uint32_t size = read_u32(execution_plan_data, idx); + idx += sizeof(uint32_t); + + if (idx + sizeof(uint32_t) * size > execution_plan_data.size()) + throw std::runtime_error("Source table decode error : invalid entry data"); + + std::vector<uint32_t> execution_plan_vector; + for (uint32_t j = 0; j < size; ++j) + { + uint32_t execution_plan_inform = read_u32(execution_plan_data, idx); + idx += sizeof(uint32_t); + + execution_plan_vector.push_back(execution_plan_inform); + } + + if (execution_plan_table.insert({id, execution_plan_vector}).second == false) + throw std::runtime_error("Op table decode error : duplicated origin ID"); + } + + if (idx != execution_plan_data.size()) + throw std::runtime_error("Op table decode error : data size invalid"); + + if (execution_plan_table.size() != entry_number) + throw std::runtime_error("Op table decode error : entry number invalid"); + + return execution_plan_table; +} + } // namespace namespace luci @@ -153,6 +202,8 @@ CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader) _op_table = decoded_op_table(buffer); else if (meta.name.compare("ONE_source_table") == 0) _source_table = decoded_source_table(buffer); + else if (meta.name.compare("ONE_execution_plan_table") == 0) + _execution_plan_table = decoded_execution_plan(buffer); } } diff --git a/compiler/luci/import/src/CircleImportMetadata.h b/compiler/luci/import/src/CircleImportMetadata.h index 007985dcc..0e0240678 100644 --- a/compiler/luci/import/src/CircleImportMetadata.h +++ b/compiler/luci/import/src/CircleImportMetadata.h @@ -20,6 +20,7 @@ #include "luci/Import/CircleReader.h" #include <luci/Profile/CircleNodeOrigin.h> +#include <luci/IR/ExecutionPlanTable.h> #include <map> #include <set> @@ -47,10 +48,15 @@ public: const std::map<uint32_t, std::string> &source_table(void) const { return _source_table; } + const luci::ExecutionPlanTable &execution_plan_table(void) const { return _execution_plan_table; } + private: // Decoded metadata is stored std::map<uint32_t, std::string> _source_table; std::map<uint32_t, std::set<uint32_t>> _op_table; + // _execution_plan_table stores for node with node_id order of execution, + // and offsets output tensors + luci::ExecutionPlanTable _execution_plan_table; }; } // namespace luci diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp index 68baefab0..8eae5fcf4 100644 --- a/compiler/luci/import/src/Importer.cpp +++ b/compiler/luci/import/src/Importer.cpp @@ -28,6 +28,7 @@ #include <luci/IR/CircleNodes.h> #include <luci/Profile/CircleNodeID.h> #include <luci/Profile/CircleNodeOrigin.h> +#include <luci/Plan/CircleNodeExecutionPlan.h> #include <luci/Log.h> #include <luci/LogHelper.h> @@ -344,6 +345,25 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const module->source_table(table); } + // Add execution_plan annotations + if (circle_metadata->execution_plan_table().size() > 0) + { + auto execution_plan_table = circle_metadata->execution_plan_table(); + auto node_position = 0; + for (auto node : loco::postorder_traversal(loco::output_nodes(module->graph()))) + { + if (auto circle_node = dynamic_cast<luci::CircleNode *>(node)) + { + auto node_plan = execution_plan_table[node_position]; + luci::add_execution_plan( + circle_node, + luci::CircleNodeExecutionPlan( + node_plan[0], std::vector<uint32_t>(node_plan.begin() + 1, node_plan.end()))); + } + node_position++; + } + } + return module; } diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt index 669a866b1..433b7cd4e 100644 --- a/compiler/luci/lang/CMakeLists.txt +++ b/compiler/luci/lang/CMakeLists.txt @@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_lang SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_lang ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_lang PRIVATE src) target_include_directories(luci_lang PUBLIC include) target_link_libraries(luci_lang PUBLIC loco) diff --git a/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h new file mode 100644 index 000000000..5c33c1123 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_EXECUTION_PLAN_TABLE_H__ +#define __LUCI_EXECUTION_PLAN_TABLE_H__ + +namespace luci +{ + +using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>; + +} // namespace luci + +#endif // __LUCI_EXECUTION_PLAN_TABLE_H__ diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt index 23bd00828..b64a0651e 100644 --- a/compiler/luci/log/CMakeLists.txt +++ b/compiler/luci/log/CMakeLists.txt @@ -1,7 +1,11 @@ # TODO Find how to test logging framework file(GLOB_RECURSE SOURCES "src/*.cpp") -add_library(luci_log SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_log ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_log PUBLIC include) target_link_libraries(luci_log PUBLIC hermes) target_link_libraries(luci_log PRIVATE hermes_std) diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt index cd2571ba1..4d801b046 100644 --- a/compiler/luci/logex/CMakeLists.txt +++ b/compiler/luci/logex/CMakeLists.txt @@ -1,7 +1,11 @@ # TODO Find how to test logging-ex utility file(GLOB_RECURSE SOURCES "src/*.cpp") -add_library(luci_logex SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_logex ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_logex PUBLIC include) target_link_libraries(luci_logex PUBLIC loco) target_link_libraries(luci_logex PUBLIC locop) diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt index 236b689c4..eacbe1ccc 100644 --- a/compiler/luci/partition/CMakeLists.txt +++ b/compiler/luci/partition/CMakeLists.txt @@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_partition SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_partition ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_partition PRIVATE src) target_include_directories(luci_partition PUBLIC include) target_link_libraries(luci_partition PUBLIC luci_lang) diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt index fd06c6d52..2361bb4f5 100644 --- a/compiler/luci/pass/CMakeLists.txt +++ b/compiler/luci/pass/CMakeLists.txt @@ -1,8 +1,18 @@ +nnas_find_package(FlatBuffers EXACT 1.10 QUIET) +if(NOT FlatBuffers_FOUND) + message(STATUS "FlatBuffers NOT FOUND") + return() +endif(NOT FlatBuffers_FOUND) + file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_pass SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_pass ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_pass PRIVATE src) target_include_directories(luci_pass PUBLIC include) target_link_libraries(luci_pass PUBLIC loco) @@ -13,9 +23,11 @@ target_link_libraries(luci_pass PRIVATE luci_log) target_link_libraries(luci_pass PRIVATE luci_service) target_link_libraries(luci_pass PRIVATE luci_logex) target_link_libraries(luci_pass PRIVATE luci_profile) +target_link_libraries(luci_pass PRIVATE mio_tflite260_inc) target_link_libraries(luci_pass PRIVATE nncc_common) target_link_libraries(luci_pass PRIVATE pepper_csv2vec) target_link_libraries(luci_pass PRIVATE oops) +target_link_libraries(luci_pass PRIVATE flatbuffers-1.12) install(TARGETS luci_pass DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") @@ -31,4 +43,5 @@ target_include_directories(luci_pass_test PRIVATE src) target_link_libraries(luci_pass_test luci_pass) target_link_libraries(luci_pass_test luci_lang) target_link_libraries(luci_pass_test luci_testhelper) +target_link_libraries(luci_pass_test flatbuffers-1.12) #target_link_libraries(luci_pass_test oops) diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index 3bcc7c5bb..917cacae9 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -34,6 +34,7 @@ public: { enum Algorithm { + FuseAddWithFullyConnected, FuseAddWithTConv, FuseBatchNormWithConv, FuseBatchNormWithDwConv, @@ -51,8 +52,10 @@ public: Requantize, FoldAddV2, FoldCast, + FoldDepthwiseConv2D, FoldDequantize, FoldSparseToDense, + ForceQuantParam, ForwardReshapeToUnaryOp, SparsifyTensorPass, FusePreActivationBatchNorm, @@ -64,7 +67,9 @@ public: ReplaceSubWithAdd, SubstitutePackToReshape, SubstitutePadV2ToPad, + SubstituteSplitVToSplit, SubstituteSqueezeToReshape, + ExpandBroadcastConst, ConvertNCHWToNHWC, RemoveUnnecessarySlice, RemoveUnnecessaryStridedSlice, @@ -82,9 +87,12 @@ public: enum AlgorithmParameters { // quantize - Quantize_input_dtype, - Quantize_output_dtype, + Quantize_input_model_dtype, + Quantize_output_model_dtype, Quantize_granularity, // layer-wise or channel-wise + Quantize_tensor_names, + Quantize_scales, + Quantize_zero_points, // sparsify Sparsify_tensor_name, @@ -96,6 +104,9 @@ public: // convert NCHW to NHWC NCHW_to_NHWC_input_shape, NCHW_to_NHWC_output_shape, + + Quantize_input_dtype = Quantize_input_model_dtype, // TODO Remove this + Quantize_output_dtype = Quantize_output_model_dtype, // TODO Remove this }; virtual ~Options() = default; @@ -104,6 +115,8 @@ public: virtual bool query(Algorithm) = 0; virtual void param(AlgorithmParameters, const std::string &) = 0; virtual const std::string param(AlgorithmParameters) const = 0; + virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0; + virtual std::vector<std::string> params(AlgorithmParameters) const = 0; }; public: diff --git a/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h new file mode 100644 index 000000000..5ee26b472 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_EXPAND_BROADCAST_CONST_PASS_H__ +#define __LUCI_EXPAND_BROADCAST_CONST_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to remove broadcasts of Const nodes. + */ +struct ExpandBroadcastConstPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::ExpandBroadcastConstPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_EXPAND_BROADCAST_CONST_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h new file mode 100644 index 000000000..58e5b71a7 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__ +#define __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to fold DepthwiseConv2D with constant input and filter into a + * constant tensor + */ +struct FoldDepthwiseConv2DPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FoldDepthwiseConv2DPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h new file mode 100644 index 000000000..752ce1d31 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FORCE_QUANT_PARAM_PASS_H__ +#define __LUCI_FORCE_QUANT_PARAM_PASS_H__ + +#include <loco.h> + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Pass to write quantparam (scale, zerop) to the specified tensors + */ +class ForceQuantParamPass : public logo::Pass +{ +public: + using TensorVector = std::vector<std::string>; + using ScaleVector = std::vector<float>; + using ZPVector = std::vector<int64_t>; + +public: + ForceQuantParamPass(TensorVector &tensors, ScaleVector &scales, ZPVector &zerops) + : _tensors{tensors}, _scales{scales}, _zerops{zerops} + { + // DO NOTHING + } + virtual const char *name(void) const { return "luci::ForceQuantParamPass"; } + +public: + bool run(loco::Graph *graph); + +private: + TensorVector _tensors; + ScaleVector _scales; + ZPVector _zerops; +}; + +} // namespace luci + +#endif //__LUCI_FORCE_QUANT_PARAM_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h new file mode 100644 index 000000000..a59b644e9 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__ +#define __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to fuse Add into FullyConnected + */ +struct FuseAddWithFullyConnectedPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::FuseAddWithFullyConnectedPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__ diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h index 78e7323f9..68765ec5b 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h @@ -32,9 +32,10 @@ namespace luci class QuantizeDequantizeWeightsPass : public logo::Pass { public: - QuantizeDequantizeWeightsPass(loco::DataType input_dtype, loco::DataType output_dtype, + QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, QuantizationGranularity granularity) - : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity} + : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{ + granularity} { // DO NOTHING } @@ -44,8 +45,8 @@ public: bool run(loco::Graph *graph); private: - loco::DataType _input_dtype; - loco::DataType _output_dtype; + loco::DataType _input_model_dtype; + loco::DataType _output_model_dtype; QuantizationGranularity _granularity; }; diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h index 9520910d5..d618a07b6 100644 --- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h +++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h @@ -32,9 +32,10 @@ namespace luci class QuantizeWithMinMaxPass : public logo::Pass { public: - QuantizeWithMinMaxPass(loco::DataType input_dtype, loco::DataType output_dtype, + QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype, QuantizationGranularity granularity) - : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity} + : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{ + granularity} { // DO NOTHING } @@ -44,8 +45,8 @@ public: bool run(loco::Graph *graph); private: - loco::DataType _input_dtype; - loco::DataType _output_dtype; + loco::DataType _input_model_dtype; + loco::DataType _output_model_dtype; QuantizationGranularity _granularity; }; diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h new file mode 100644 index 000000000..8c8900159 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__ +#define __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__ + +#include <logo/Pass.h> + +namespace luci +{ + +/** + * @brief Class to substitute certain SplitV to Split. + */ +struct SubstituteSplitVToSplitPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::SubstituteSplitVToSplitPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 98c22a07a..5d0c92625 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -17,12 +17,16 @@ #include "luci/CircleOptimizer.h" #include "luci/Pass/ConvertNCHWToNHWCPass.h" +#include "luci/Pass/ExpandBroadcastConstPass.h" #include "luci/Pass/FoldAddV2Pass.h" #include "luci/Pass/FoldCastPass.h" +#include "luci/Pass/FoldDepthwiseConv2DPass.h" #include "luci/Pass/FoldDequantizePass.h" #include "luci/Pass/FoldSparseToDensePass.h" #include "luci/Pass/ForwardReshapeToUnaryOpPass.h" +#include "luci/Pass/ForceQuantParamPass.h" #include "luci/Pass/FuseActivationFunctionPass.h" +#include "luci/Pass/FuseAddWithFullyConnectedPass.h" #include "luci/Pass/FuseAddWithTConvPass.h" #include "luci/Pass/FuseBatchNormWithConvPass.h" #include "luci/Pass/FuseBatchNormWithDwConvPass.h" @@ -55,6 +59,7 @@ #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h" #include "luci/Pass/SubstitutePackToReshapePass.h" #include "luci/Pass/SubstitutePadV2ToPadPass.h" +#include "luci/Pass/SubstituteSplitVToSplitPass.h" #include "luci/Pass/SubstituteSqueezeToReshapePass.h" #include "luci/Pass/SubstituteStridedSliceToReshapePass.h" #include "luci/Pass/SubstituteTransposeToReshapePass.h" @@ -86,17 +91,37 @@ namespace using namespace luci; +template <typename T> T lexical_cast(const std::string &str) +{ + std::istringstream ss; + ss.str(str); + T data; + ss >> data; + return data; +} + +template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv) +{ + std::vector<T> result; + std::transform(sv.begin(), sv.end(), std::back_inserter(result), + [](std::string str) -> T { return lexical_cast<T>(str); }); + return result; +} + class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options { public: void enable(Algorithm) final; void param(AlgorithmParameters, const std::string &) final; const std::string param(AlgorithmParameters) const final; + void params(AlgorithmParameters, std::vector<std::string> &) final; + std::vector<std::string> params(AlgorithmParameters) const final; bool query(Algorithm) final; private: std::vector<Algorithm> _algorithms; std::map<AlgorithmParameters, const std::string> _algorithm_params; + std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params; }; void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); } @@ -119,6 +144,24 @@ const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const } } +void OptimizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec) +{ + _multiple_params[param] = vec; +} + +std::vector<std::string> OptimizeOptionsImpl::params(AlgorithmParameters param) const +{ + auto param_vec = _multiple_params.find(param); + if (param_vec != _multiple_params.end()) + { + return param_vec->second; + } + else + { + return std::vector<std::string>(); + } +} + bool OptimizeOptionsImpl::query(Algorithm algo) { std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo); @@ -237,6 +280,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>()); } + if (_options->query(Options::Algorithm::FuseAddWithFullyConnected)) + { + phase.emplace_back(std::make_unique<FuseAddWithFullyConnectedPass>()); + } if (_options->query(Options::Algorithm::FuseAddWithTConv)) { phase.emplace_back(std::make_unique<FuseAddWithTConvPass>()); @@ -257,6 +304,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::FoldCastPass>()); } + if (_options->query(Options::Algorithm::FoldDepthwiseConv2D)) + { + phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>()); + } if (_options->query(Options::Algorithm::FoldDequantize)) { phase.emplace_back(std::make_unique<luci::FoldDequantizePass>()); @@ -281,6 +332,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>()); } + if (_options->query(Options::Algorithm::ExpandBroadcastConst)) + { + phase.emplace_back(std::make_unique<luci::ExpandBroadcastConstPass>()); + } if (_options->query(Options::Algorithm::RemoveFakeQuant)) { phase.emplace_back(std::make_unique<luci::RemoveFakeQuantPass>()); @@ -329,6 +384,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique<luci::SubstitutePadV2ToPadPass>()); } + if (_options->query(Options::Algorithm::SubstituteSplitVToSplit)) + { + phase.emplace_back(std::make_unique<luci::SubstituteSplitVToSplitPass>()); + } if (_options->query(Options::Algorithm::SubstituteSqueezeToReshape)) { phase.emplace_back(std::make_unique<luci::SubstituteSqueezeToReshapePass>()); @@ -363,28 +422,30 @@ void CircleOptimizer::quantize(loco::Graph *g) const // Fake quantization of weights if (_options->query(Options::Algorithm::QuantizeDequantizeWeights)) { - static const std::vector<std::string> fakeq_supported_input_dtype{"float32"}; - static const std::vector<std::string> fakeq_supported_output_dtype{"uint8", "int16"}; + static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"}; + static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"}; static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"}; - auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype); - auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype); + auto input_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); + auto output_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); - if (!in_array(to_lower_case(input_dtype), fakeq_supported_input_dtype)) + if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype)) throw std::runtime_error("Unsupported input type. List of supported input type: " + - to_string(fakeq_supported_input_dtype)); + to_string(fakeq_supported_input_model_dtype)); - if (!in_array(to_lower_case(output_dtype), fakeq_supported_output_dtype)) + if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype)) throw std::runtime_error("Unsupported output type. List of supported output type: " + - to_string(fakeq_supported_output_dtype)); + to_string(fakeq_supported_output_model_dtype)); if (!in_array(to_lower_case(granularity), fakeq_supported_granularity)) throw std::runtime_error("Unsupported granularity. List of supported granularity: " + to_string(fakeq_supported_granularity)); if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && - str_to_dtype(output_dtype) != loco::DataType::U8) + str_to_dtype(output_model_dtype) != loco::DataType::U8) throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); // Clear existing quantparams before doing fake quantization @@ -395,39 +456,43 @@ void CircleOptimizer::quantize(loco::Graph *g) const circle_node->quantparam(nullptr); } - luci::QuantizeDequantizeWeightsPass fake_quantizer( - str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity)); + luci::QuantizeDequantizeWeightsPass fake_quantizer(str_to_dtype(input_model_dtype), + str_to_dtype(output_model_dtype), + str_to_granularity(granularity)); fake_quantizer.run(g); } // Actual quantization of weights, bias, and activation if (_options->query(Options::Algorithm::QuantizeWithMinMax)) { - static const std::vector<std::string> qwmm_supported_input_dtype{"float32"}; - static const std::vector<std::string> qwmm_supported_output_dtype{"uint8", "int16"}; + static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"}; + static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"}; static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"}; - auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype); - auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype); + auto input_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); + auto output_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity); - if (!in_array(to_lower_case(input_dtype), qwmm_supported_input_dtype)) + if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype)) throw std::runtime_error("Unsupported input type. List of supported input types: " + - to_string(qwmm_supported_input_dtype)); + to_string(qwmm_supported_input_model_dtype)); - if (!in_array(to_lower_case(output_dtype), qwmm_supported_output_dtype)) + if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype)) throw std::runtime_error("Unsupported output type. List of supported output types: " + - to_string(qwmm_supported_output_dtype)); + to_string(qwmm_supported_output_model_dtype)); if (!in_array(to_lower_case(granularity), qwmm_supported_granularity)) throw std::runtime_error("Unsupported granularity. List of supported granularity: " + to_string(qwmm_supported_granularity)); if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise && - str_to_dtype(output_dtype) != loco::DataType::U8) + str_to_dtype(output_model_dtype) != loco::DataType::U8) throw std::runtime_error("Layer-wise quantization only supports uint8 dtype."); - luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype), + luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_model_dtype), + str_to_dtype(output_model_dtype), str_to_granularity(granularity)); quantizer.run(g); @@ -446,7 +511,7 @@ void CircleOptimizer::quantize(loco::Graph *g) const phase_runner.run(phase); // Verify the type/granularity of the quantized model - luci::QuantizedModelVerifier verifier(str_to_dtype(output_dtype), + luci::QuantizedModelVerifier verifier(str_to_dtype(output_model_dtype), str_to_granularity(granularity)); verifier.verify(g); } @@ -454,24 +519,44 @@ void CircleOptimizer::quantize(loco::Graph *g) const // Requantize if (_options->query(Options::Algorithm::Requantize)) { - static const std::vector<std::string> rq_supported_input_dtype{"int8"}; - static const std::vector<std::string> rq_supported_output_dtype{"uint8"}; + static const std::vector<std::string> rq_supported_input_model_dtype{"int8"}; + static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"}; - auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype); - auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype); + auto input_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype); + auto output_model_dtype = + _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype); - if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype)) + if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype)) throw std::runtime_error("Unsupported input type. List of supported input types: " + - to_string(rq_supported_input_dtype)); + to_string(rq_supported_input_model_dtype)); - if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype)) + if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype)) throw std::runtime_error("Unsupported output type. List of supported output types: " + - to_string(rq_supported_output_dtype)); + to_string(rq_supported_output_model_dtype)); - luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype)); + luci::RequantizePass requantizer(str_to_dtype(input_model_dtype), + str_to_dtype(output_model_dtype)); requantizer.run(g); } + // Force to write quantparam to specified tensors + // NOTE Only per-tensor (not per-channel) qparam can be written + if (_options->query(Options::Algorithm::ForceQuantParam)) + { + ForceQuantParamPass::TensorVector tensors = + _options->params(Options::AlgorithmParameters::Quantize_tensor_names); + auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales); + auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points); + + // Cast scales/zero_points to proper types + ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales); + ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points); + + ForceQuantParamPass fq(tensors, scales, zero_points); + fq.run(g); + } + logo::Phase phase; // Do Shape/Type inference diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp index 43d96feaf..a1b5c7f80 100644 --- a/compiler/luci/pass/src/CircleOptimizer.test.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp @@ -33,6 +33,7 @@ TEST(CircleOptimizerTest, optimize_algorithms) // TODO add more if needed options->enable(Algorithms::FoldAddV2); options->enable(Algorithms::FoldCast); + options->enable(Algorithms::FoldDepthwiseConv2D); options->enable(Algorithms::FoldDequantize); options->enable(Algorithms::FoldSparseToDense); options->enable(Algorithms::FusePreActivationBatchNorm); @@ -45,6 +46,7 @@ TEST(CircleOptimizerTest, optimize_algorithms) options->enable(Algorithms::SubstituteStridedSliceToReshape); options->enable(Algorithms::SubstituteTransposeToReshape); options->enable(Algorithms::ConvertNCHWToNHWC); + options->enable(Algorithms::ExpandBroadcastConst); o.optimize(&g); @@ -78,8 +80,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_simple) auto options = o.options(); options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); options->param(AlgorithmParameters::Quantize_granularity, "layer"); o.quantize(&g); @@ -95,8 +97,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG) auto options = o.options(); options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); options->param(AlgorithmParameters::Quantize_granularity, "layer"); EXPECT_THROW(o.quantize(&g), std::runtime_error); @@ -110,8 +112,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG) auto options = o.options(); options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); options->param(AlgorithmParameters::Quantize_granularity, "layer"); EXPECT_THROW(o.quantize(&g), std::runtime_error); @@ -125,8 +127,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG) auto options = o.options(); options->enable(Algorithms::QuantizeDequantizeWeights); - options->param(AlgorithmParameters::Quantize_input_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); options->param(AlgorithmParameters::Quantize_granularity, "invalid"); EXPECT_THROW(o.quantize(&g), std::runtime_error); @@ -140,8 +142,8 @@ TEST(CircleOptimizerTest, quantize_minmax_simple) auto options = o.options(); options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); options->param(AlgorithmParameters::Quantize_granularity, "layer"); o.quantize(&g); @@ -157,8 +159,8 @@ TEST(CircleOptimizerTest, quantize_minmax_input_NEG) auto options = o.options(); options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); options->param(AlgorithmParameters::Quantize_granularity, "layer"); EXPECT_THROW(o.quantize(&g), std::runtime_error); @@ -172,8 +174,8 @@ TEST(CircleOptimizerTest, quantize_minmax_output_NEG) auto options = o.options(); options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); options->param(AlgorithmParameters::Quantize_granularity, "layer"); EXPECT_THROW(o.quantize(&g), std::runtime_error); @@ -187,8 +189,8 @@ TEST(CircleOptimizerTest, quantize_minmax_gran_NEG) auto options = o.options(); options->enable(Algorithms::QuantizeWithMinMax); - options->param(AlgorithmParameters::Quantize_input_dtype, "float32"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); options->param(AlgorithmParameters::Quantize_granularity, "invalid"); EXPECT_THROW(o.quantize(&g), std::runtime_error); @@ -202,8 +204,8 @@ TEST(CircleOptimizerTest, quantize_requant_simple) auto options = o.options(); options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_dtype, "int8"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); o.quantize(&g); @@ -218,8 +220,8 @@ TEST(CircleOptimizerTest, quantize_requant_input_NEG) auto options = o.options(); options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_dtype, "invalid"); - options->param(AlgorithmParameters::Quantize_output_dtype, "uint8"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8"); EXPECT_THROW(o.quantize(&g), std::runtime_error); } @@ -232,8 +234,8 @@ TEST(CircleOptimizerTest, quantize_requant_output_NEG) auto options = o.options(); options->enable(Algorithms::Requantize); - options->param(AlgorithmParameters::Quantize_input_dtype, "int8"); - options->param(AlgorithmParameters::Quantize_output_dtype, "invalid"); + options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8"); + options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid"); EXPECT_THROW(o.quantize(&g), std::runtime_error); } diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp index 95e23e1b8..270714049 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp @@ -28,6 +28,22 @@ namespace { +bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape) +{ + if (not node) + return false; + + if (shape.size() != node->rank()) + return false; + + for (uint32_t i = 0; i < shape.size(); i++) + { + if (not(node->dim(i) == shape[i])) + return false; + } + return true; +} + enum class DataFormat { NCHW, @@ -465,7 +481,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node, // // Find MUL with an NCHW pattern described below // - Input (non-constant) shape : [N, C, H, W] -// - Input (constant) shape : [1, C, 1, 1] or a scalar (1) +// - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1) // - Output shape : [N, C, H, W] bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node, luci::CircleConst *&multiplier) @@ -497,26 +513,22 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod if (const_rank != 4 && const_rank != 0 && const_rank != 1) return false; - if (const_rank == 4) - { - for (uint32_t i = 0; i < const_rank; i++) - { - if (i != 1 && multiplier->dim(i).value() != 1) - return false; - } - } - const auto input_cdim = pred_node->dim(1); const auto output_cdim = node->dim(1); if (const_rank == 4) { - const auto const_cdim = multiplier->dim(1); - // Check Input, Output, Const have the same channel size - if (const_cdim == input_cdim && input_cdim == output_cdim) - return true; - else - return false; + bool supported_shape = false; + + // Check multiplier is (1, C, 1, 1) + if (is_same_shape(multiplier, {1, node->dim(1), 1, 1})) + supported_shape = true; + + // Check multiplier is (N, C, H, W) + if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) + supported_shape = true; + + return supported_shape; } if (input_cdim == output_cdim) return true; @@ -527,7 +539,7 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod // We assume ADD with const input is NCHW if, // Input shape: (N, C, H, W) // Output shape: (N, C, H, W) -// 1. Const shape is (1, C, 1, 1) or a scalar (1) +// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1) // 2. Input, Output, Const have the same C. bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_node, luci::CircleConst *&beta) @@ -559,30 +571,22 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod if (const_rank != 4 && const_rank != 0 && const_rank != 1) return false; - if (const_rank == 4) - { - // Check the shape is (1, C, 1, 1) - for (uint32_t i = 0; i < const_rank; i++) - { - if (i == 1) - continue; - - if (beta->dim(i).value() != 1) - return false; - } - } - const auto input_cdim = pred_node->dim(1); const auto output_cdim = node->dim(1); if (const_rank == 4) { - const auto const_cdim = beta->dim(1); - // Check Input, Output, Const have the same channel size - if (const_cdim == input_cdim && input_cdim == output_cdim) - return true; - else - return false; + bool supported_shape = false; + + // Check beta is (1, C, 1, 1) + if (is_same_shape(beta, {1, node->dim(1), 1, 1})) + supported_shape = true; + + // Check beta is (N, C, H, W) + if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) + supported_shape = true; + + return supported_shape; } if (input_cdim == output_cdim) return true; @@ -593,7 +597,7 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod // We assume SUB with const input is NCHW if, // Input shape: (N, C, H, W) // Output shape: (N, C, H, W) -// 1. Const shape is (1, C, 1, 1) or a scalar (1) +// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1) // 2. Input, Output, Const have the same C. bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pred_node, const luci::CircleConst *subtract) @@ -609,30 +613,22 @@ bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pre if (const_rank != 4 && const_rank != 0 && const_rank != 1) return false; - if (const_rank == 4) - { - // Check the shape is (1, C, 1, 1) - for (uint32_t i = 0; i < const_rank; i++) - { - if (i == 1) - continue; - - if (subtract->dim(i).value() != 1) - return false; - } - } - const auto input_cdim = pred_node->dim(1); const auto output_cdim = node->dim(1); if (const_rank == 4) { - const auto const_cdim = subtract->dim(1); - // Check Input, Output, Const have the same channel size - if (const_cdim == input_cdim && input_cdim == output_cdim) - return true; - else - return false; + bool supported_shape = false; + + // Check subtract is (1, C, 1, 1) + if (is_same_shape(subtract, {1, node->dim(1), 1, 1})) + supported_shape = true; + + // Check subtract is (N, C, H, W) + if (is_same_shape(subtract, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)})) + supported_shape = true; + + return supported_shape; } if (input_cdim == output_cdim) return true; diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp index d844246f8..c9412fbb1 100644 --- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp +++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp @@ -130,6 +130,19 @@ protected: } public: + void update_const_shape_to_nchw(void) + { + uint32_t channel_size = 16; + beta->shape({1, channel_size, 4, 4}); + + beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4); + for (uint32_t i = 0; i < channel_size; i++) + { + beta->at<loco::DataType::FLOAT32>(i) = i; + } + } + +public: luci::CircleAdd *add = nullptr; luci::CircleConst *beta = nullptr; }; @@ -421,6 +434,19 @@ protected: } public: + void update_const_shape_to_nchw(void) + { + uint32_t channel_size = 16; + multiplier->shape({1, channel_size, 4, 4}); + + multiplier->size<loco::DataType::FLOAT32>(channel_size * 4 * 4); + for (uint32_t i = 0; i < channel_size; i++) + { + multiplier->at<loco::DataType::FLOAT32>(i) = i; + } + } + +public: luci::CircleMul *mul = nullptr; luci::CircleConst *multiplier = nullptr; }; @@ -696,6 +722,19 @@ protected: } public: + void update_const_shape_to_nchw(void) + { + uint32_t channel_size = 16; + beta->shape({1, channel_size, 4, 4}); + + beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4); + for (uint32_t i = 0; i < channel_size; i++) + { + beta->at<loco::DataType::FLOAT32>(i) = i; + } + } + +public: luci::CircleSub *sub = nullptr; luci::CircleConst *beta = nullptr; }; @@ -815,6 +854,30 @@ TEST(ConvertNCHWToNHWC, Add) check_pre_trans(g.output->from()); } +TEST(ConvertNCHWToNHWC, Add_NCHW_const) +{ + AddGraph g; + g.init(); + g.update_const_shape_to_nchw(); + + run_phase(&g.g, false, false); + + check_pre_trans(g.add->x()); + + auto add_succs = loco::succs(g.add); + EXPECT_EQ(1, add_succs.size()); + check_post_trans(*add_succs.begin()); + + uint32_t channel_size = 16; + auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y()); + EXPECT_NE(nullptr, new_beta); + EXPECT_EQ(4, new_beta->rank()); + EXPECT_EQ(1, new_beta->dim(0).value()); + EXPECT_EQ(4, new_beta->dim(1).value()); + EXPECT_EQ(4, new_beta->dim(2).value()); + EXPECT_EQ(channel_size, new_beta->dim(3).value()); +} + TEST(ConvertNCHWToNHWC, NHWC_Relu) { // Relu is already NHWC, so it should not be converted @@ -1123,6 +1186,30 @@ TEST(ConvertNCHWToNHWC, Mul) check_pre_trans(g.output->from()); } +TEST(ConvertNCHWToNHWC, Mul_NCHW_const) +{ + MulGraph g; + g.init(); + g.update_const_shape_to_nchw(); + + run_phase(&g.g, false, false); + + check_pre_trans(g.mul->x()); + + auto mul_succs = loco::succs(g.mul); + EXPECT_EQ(1, mul_succs.size()); + check_post_trans(*mul_succs.begin()); + + uint32_t channel_size = 16; + auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y()); + EXPECT_NE(nullptr, new_multiplier); + EXPECT_EQ(4, new_multiplier->rank()); + EXPECT_EQ(1, new_multiplier->dim(0).value()); + EXPECT_EQ(4, new_multiplier->dim(1).value()); + EXPECT_EQ(4, new_multiplier->dim(2).value()); + EXPECT_EQ(channel_size, new_multiplier->dim(3).value()); +} + TEST(ConvertNCHWToNHWC, MulScalar) { MulScalarGraph g; @@ -1432,6 +1519,30 @@ TEST(ConvertNCHWToNHWC, Sub) check_pre_trans(g.output->from()); } +TEST(ConvertNCHWToNHWC, Sub_NCHW_const) +{ + SubGraph g; + g.init(); + g.update_const_shape_to_nchw(); + + run_phase(&g.g, false, false); + + check_pre_trans(g.sub->x()); + + auto sub_succs = loco::succs(g.sub); + EXPECT_EQ(1, sub_succs.size()); + check_post_trans(*sub_succs.begin()); + + uint32_t channel_size = 16; + auto new_beta = dynamic_cast<luci::CircleConst *>(g.sub->y()); + EXPECT_NE(nullptr, new_beta); + EXPECT_EQ(4, new_beta->rank()); + EXPECT_EQ(1, new_beta->dim(0).value()); + EXPECT_EQ(4, new_beta->dim(1).value()); + EXPECT_EQ(4, new_beta->dim(2).value()); + EXPECT_EQ(channel_size, new_beta->dim(3).value()); +} + TEST(ConvertNCHWToNHWC, SubScalar) { SubScalarGraph g; diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp new file mode 100644 index 000000000..25fb9f171 --- /dev/null +++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ExpandBroadcastConstPass.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Log.h> + +#include <type_traits> + +namespace +{ + +luci::CircleConst *create_expanded_constant(luci::CircleConst *node, luci::CircleNode *successor) +{ + LOGGER(l); + + if (successor->rank() != node->rank()) + return nullptr; + + std::vector<uint32_t> broadcast_dims; + for (uint32_t dim = 0; dim < node->rank(); ++dim) + { + if (node->dim(dim) == successor->dim(dim)) + continue; + + if (node->dim(dim) == 1) + broadcast_dims.push_back(dim); + } + + if (broadcast_dims.size() != 1 || broadcast_dims.back() != node->rank() - 1) + { + WARN(l) << "NYI: Only depth broadcast removal is supported"; + return nullptr; + } + + auto constant = node->graph()->nodes()->create<luci::CircleConst>(); + constant->name(node->name()); + constant->dtype(node->dtype()); + constant->rank(node->rank()); + constant->shape_status(luci::ShapeStatus::VALID); + + uint32_t node_size = node->size<loco::DataType::FLOAT32>(); + uint32_t constant_size = 1; + for (uint32_t i = 0; i < successor->rank(); ++i) + { + constant->dim(i).set(successor->dim(i).value()); + constant_size *= constant->dim(i).value(); + } + constant->size<loco::DataType::FLOAT32>(constant_size); + + auto const node_data = &node->at<loco::DataType::FLOAT32>(0); + auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0); + + auto const successor_depth = successor->dim(successor->rank() - 1).value(); + for (uint32_t d = 0; d < successor_depth; ++d) + std::copy(node_data, node_data + node_size, constant_data + d * node_size); + + return constant; +} + +template <typename N> bool expand_node_input(luci::CircleConst *node, luci::CircleNode *successor) +{ + static_assert(std::is_base_of<luci::CircleNode, N>::value, + "Successor node should have CircleNode base"); + + auto const successor_node = loco::must_cast<N *>(successor); + auto const successor_x = loco::must_cast<luci::CircleNode *>(successor_node->x()); + auto const successor_y = loco::must_cast<luci::CircleNode *>(successor_node->y()); + + luci::CircleConst *expanded_const; + + if (node == successor_x) + { + expanded_const = create_expanded_constant(node, successor_y); + + if (expanded_const == nullptr) + return false; + + successor_node->x(expanded_const); + } + else if (node == successor_y) + { + expanded_const = create_expanded_constant(node, successor_x); + + if (expanded_const == nullptr) + return false; + + successor_node->y(expanded_const); + } + + return true; +} + +/** + * Expand constants following broadcasting rules for binary input nodes (Add, Mul, etc.) + * + * BEFORE + * + * [CircleInput] [CircleConst (H x W x 1)] + * | | + * [CircleAdd] + * + * AFTER + * + * [CircleInput] [CircleConst (H x W x D)] + * | | + * [CircleAdd] + */ +bool expand_broadcast_const(luci::CircleConst *node) +{ + if (node->dtype() != loco::DataType::FLOAT32) + return false; // Unsupported data type + + bool changed = false; + + for (auto successor : loco::succs(node)) + { + auto const circle_successor = loco::must_cast<luci::CircleNode *>(successor); + switch (circle_successor->opcode()) + { + case luci::CircleOpcode::ADD: + if (expand_node_input<luci::CircleAdd>(node, circle_successor)) + changed = true; + break; + case luci::CircleOpcode::MUL: + if (expand_node_input<luci::CircleMul>(node, circle_successor)) + changed = true; + break; + case luci::CircleOpcode::DIV: + if (expand_node_input<luci::CircleDiv>(node, circle_successor)) + changed = true; + break; + default: + break; // Unsupported successor node + } + } + + return changed; +} + +} // namespace + +namespace luci +{ + +/** + * Broadcast expanding for Const nodes + **/ +bool ExpandBroadcastConstPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto const_node = dynamic_cast<luci::CircleConst *>(node); + if (const_node == nullptr) + continue; + + if (expand_broadcast_const(const_node)) + changed = true; + } + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp new file mode 100644 index 000000000..0734e0778 --- /dev/null +++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ExpandBroadcastConstPass.h" +#include "PassTestGraphs.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +class ExpandBroadcastConstTest : public ::testing::Test +{ +public: + ExpandBroadcastConstTest() + { + _x = _g.nodes()->create<luci::CircleInput>(); + _y = _g.nodes()->create<luci::CircleConst>(); + _add = _g.nodes()->create<luci::CircleAdd>(); + _output = _g.nodes()->create<luci::CircleOutput>(); + + auto graph_input = _g.inputs()->create(); + graph_input->dtype(loco::DataType::FLOAT32); + graph_input->shape({1, H, W, D}); + _x->index(graph_input->index()); + _x->dtype(graph_input->dtype()); + _x->shape({1, H, W, D}); + + auto graph_output = _g.outputs()->create(); + graph_output->dtype(loco::DataType::FLOAT32); + graph_output->shape({1, H, W, D}); + _output->index(graph_output->index()); + _output->dtype(graph_output->dtype()); + _output->shape({1, H, W, D}); + + _y->dtype(loco::DataType::FLOAT32); + _y->shape({1, H, W, 1}); + _y->size<loco::DataType::FLOAT32>(16); + + _add->dtype(loco::DataType::FLOAT32); + _add->fusedActivationFunction(luci::FusedActFunc::NONE); + _add->x(_x); + _add->y(_y); + _add->shape({1, H, W, D}); + + _output->from(_add); + + _x->name("input"); + _output->name("output"); + } + +protected: + uint32_t const H = 4; + uint32_t const W = 4; + uint32_t const D = 3; + +protected: + loco::Graph _g; + luci::CircleAdd *_add = nullptr; + luci::CircleInput *_x = nullptr; + luci::CircleConst *_y = nullptr; + luci::CircleOutput *_output = nullptr; +}; + +} // namespace + +TEST_F(ExpandBroadcastConstTest, name) +{ + luci::ExpandBroadcastConstPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(ExpandBroadcastConstTest, remove_broadcast) +{ + for (uint32_t i = 0; i < H * W; ++i) + _y->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i); + + luci::ExpandBroadcastConstPass pass; + ASSERT_TRUE(pass.run(&_g)); + + auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y()); + ASSERT_NE(broadcasted_const, nullptr); + + EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32); + EXPECT_EQ(broadcasted_const->dim(1).value(), H); + EXPECT_EQ(broadcasted_const->dim(2).value(), W); + EXPECT_EQ(broadcasted_const->dim(3).value(), D); + EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D); + + for (uint32_t i = 0; i < H * W; ++i) + { + for (uint32_t d = 0; d < D; ++d) + { + EXPECT_NEAR(broadcasted_const->at<loco::DataType::FLOAT32>(i + H * W * d), + static_cast<float>(i), std::numeric_limits<float>::min()); + } + } +} + +TEST_F(ExpandBroadcastConstTest, remove_broadcast_multiple_successors) +{ + auto const circle_sqrt = _g.nodes()->create<luci::CircleSqrt>(); + circle_sqrt->dtype(loco::DataType::FLOAT32); + circle_sqrt->shape({1, H, W, 1}); + circle_sqrt->x(_y); + + luci::ExpandBroadcastConstPass pass; + ASSERT_TRUE(pass.run(&_g)); + + auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y()); + auto original_const = dynamic_cast<luci::CircleConst *>(circle_sqrt->x()); + + ASSERT_NE(broadcasted_const, nullptr); + EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32); + EXPECT_EQ(broadcasted_const->dim(3).value(), D); + EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D); + + // Check if another successor's node was left intact + ASSERT_NE(original_const, nullptr); + EXPECT_EQ(original_const->dtype(), loco::DataType::FLOAT32); + EXPECT_EQ(original_const->dim(3).value(), 1); + EXPECT_EQ(original_const->size<loco::DataType::FLOAT32>(), H * W * 1); +} + +TEST_F(ExpandBroadcastConstTest, broadcast_impossible_NEG) +{ + _y->shape({1, H, W, 2}); + _y->size<loco::DataType::FLOAT32>(H * W * (D - 1)); + + luci::ExpandBroadcastConstPass pass; + ASSERT_FALSE(pass.run(&_g)); +} diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp new file mode 100644 index 000000000..6e423e3d9 --- /dev/null +++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldDepthwiseConv2DPass.h" + +#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h> + +#include <luci/IR/CircleNodes.h> +#include <luci/IR/AttrFusedActFunc.h> + +#include <luci/Log.h> + +namespace +{ + +// TODO Share activation mix/max and compute_input/output code with luci-interpreter + +bool compute_output(uint32_t *output_size, luci::Padding padding, int32_t image_size, + int32_t filter_size, int32_t stride, int32_t dilation_rate) +{ + auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1; + switch (padding) + { + case luci::Padding::SAME: + *output_size = (image_size + stride - 1) / stride; + return true; + + case luci::Padding::VALID: + *output_size = (image_size + stride - effective_filter_size) / stride; + return true; + + default: + { + LOGGER(l); + WARN(l) << "Unsupported padding: " << uint32_t(padding); + return false; + } + } +} + +uint32_t compute_padding(int32_t stride, int32_t dilation_rate, int32_t in_size, + int32_t filter_size, int32_t out_size) +{ + auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1; + auto const padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; + return padding > 0 ? padding : 0; +} + +bool set_kernel_parameters(tflite::DepthwiseParams *params, luci::CircleDepthwiseConv2D *node, + uint32_t padding_height, uint32_t padding_width) +{ + switch (node->fusedActivationFunction()) + { + case luci::FusedActFunc::NONE: + case luci::FusedActFunc::TANH: + params->float_activation_min = std::numeric_limits<float>::lowest(); + params->float_activation_max = std::numeric_limits<float>::max(); + break; + case luci::FusedActFunc::RELU: + params->float_activation_min = 0; + params->float_activation_max = std::numeric_limits<float>::max(); + break; + case luci::FusedActFunc::RELU_N1_TO_1: + params->float_activation_min = -1; + params->float_activation_max = 1; + break; + case luci::FusedActFunc::RELU6: + params->float_activation_min = 0; + params->float_activation_max = 6; + break; + default: + { + LOGGER(l); + WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction()); + return false; + } + } + + params->stride_height = node->stride()->h(); + params->stride_width = node->stride()->w(); + params->dilation_height_factor = node->dilation()->h(); + params->dilation_width_factor = node->dilation()->w(); + params->depth_multiplier = node->depthMultiplier(); + + params->padding_values.height = padding_height; + params->padding_values.width = padding_width; + + return true; +} + +/** + * Fold DepthwiseConv2D with constant input and filter into a constant tensor + * + * BEFORE + * + * [CircleConst] [CircleConst] + * | | + * [CircleDepthwiseConv2D] + * + * AFTER + * + * [CircleConst] + */ +bool fold_depthwise_conv_2d(luci::CircleDepthwiseConv2D *node) +{ + LOGGER(l); + + auto const input = dynamic_cast<luci::CircleConst *>(node->input()); + + if (input == nullptr) + return false; // Constant input is required for folding + + auto const filter = dynamic_cast<luci::CircleConst *>(node->filter()); + + if (filter == nullptr) + return false; // Constant filter is required for folding + + if (filter->dim(0).value() != 1) + return false; // Unsupported batch size + + auto const bias = dynamic_cast<luci::CircleConst *>(node->bias()); + + if (bias == nullptr) + return false; // Constant bias is required for folding + + auto const input_batches = input->dim(0).value(); + auto const input_height = input->dim(1).value(); + auto const input_width = input->dim(2).value(); + auto const input_depth = input->dim(3).value(); + + auto const filter_height = filter->dim(1).value(); + auto const filter_width = filter->dim(2).value(); + auto const filter_channels_out = filter->dim(3).value(); + + if (filter_channels_out % input_depth != 0) + return false; // Wrong input/output depth ratio + + if (node->depthMultiplier() != static_cast<int32_t>(filter_channels_out / input_depth)) + return false; // Wrong depth multiplier value + + if (bias->rank() != 1 || bias->dim(0).value() != filter_channels_out) + return false; // Unsupported bias value + + uint32_t output_height = 0; + uint32_t output_width = 0; + + if (!compute_output(&output_height, node->padding(), input_height, filter_height, + node->stride()->h(), node->dilation()->h())) + return false; // Unsupported output parameters + + if (!compute_output(&output_width, node->padding(), input_width, filter_width, + node->stride()->w(), node->dilation()->w())) + return false; // Unsupported output parameters + + auto const padding_height = compute_padding(node->stride()->h(), node->dilation()->h(), + input_height, filter_height, output_height); + auto const padding_width = compute_padding(node->stride()->w(), node->dilation()->w(), + input_width, filter_width, output_width); + + tflite::DepthwiseParams params{}; + + if (!set_kernel_parameters(¶ms, node, padding_height, padding_width)) + return false; // Unsupported kernel parameter values + + auto constant = node->graph()->nodes()->create<luci::CircleConst>(); + constant->name(node->name()); + constant->dtype(node->dtype()); + constant->rank(node->rank()); + constant->shape_status(luci::ShapeStatus::VALID); + for (uint32_t i = 0; i < node->rank(); ++i) + constant->dim(i).set(node->dim(i).value()); + + constant->size<loco::DataType::FLOAT32>(input_batches * output_height * output_width * + filter_channels_out); + + auto const input_data = &input->at<loco::DataType::FLOAT32>(0); + auto const filter_data = &filter->at<loco::DataType::FLOAT32>(0); + auto const bias_data = &bias->at<loco::DataType::FLOAT32>(0); + auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0); + + auto tensor_shape = [](luci::CircleNode *node) { + tflite::RuntimeShape runtime_shape(node->rank()); + for (uint32_t i = 0; i < node->rank(); ++i) + runtime_shape.SetDim(i, node->dim(i).value()); + return runtime_shape; + }; + + tflite::reference_ops::DepthwiseConv(params, tensor_shape(input), input_data, + tensor_shape(filter), filter_data, tensor_shape(bias), + bias_data, tensor_shape(constant), constant_data); + + loco::replace(node).with(constant); + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * Constant Folding for DepthwiseConv2D Op + **/ +bool FoldDepthwiseConv2DPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto depthwise_conv2d = dynamic_cast<CircleDepthwiseConv2D *>(node); + + if (depthwise_conv2d == nullptr) + continue; + + switch (depthwise_conv2d->dtype()) + { + case loco::DataType::FLOAT32: + changed = fold_depthwise_conv_2d(depthwise_conv2d); + break; + default: + break; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp new file mode 100644 index 000000000..b1ef56833 --- /dev/null +++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FoldDepthwiseConv2DPass.h" +#include "PassTestGraphs.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +/** + * Graph has an DepthwiseConv2D Op with constant inputs + * + * BEFORE + * + * [CircleConst] [CircleConst] + * | | + * [CircleDepthwiseConv2D] + * + * AFTER + * + * [CircleConst] + */ +class FoldDepthwiseConv2DTest : public luci::ConstantFoldingTestGraph, public ::testing::Test +{ +public: + FoldDepthwiseConv2DTest() : luci::ConstantFoldingTestGraph({1, 4, 4, 1}, loco::DataType::FLOAT32) + { + _dconv = _g.nodes()->create<luci::CircleDepthwiseConv2D>(); + _dconv_input = _g.nodes()->create<luci::CircleConst>(); + _dconv_filter = _g.nodes()->create<luci::CircleConst>(); + _dconv_bias = _g.nodes()->create<luci::CircleConst>(); + + _dconv->dtype(loco::DataType::FLOAT32); + _dconv->padding(luci::Padding::VALID); + _dconv->fusedActivationFunction(luci::FusedActFunc::NONE); + _dconv->input(_dconv_input); + _dconv->filter(_dconv_filter); + _dconv->bias(_dconv_bias); + _dconv->shape({1, 4, 4, 1}); + _dconv->stride()->h(1); + _dconv->stride()->w(1); + _dconv->depthMultiplier(1); + + _dconv_input->dtype(loco::DataType::FLOAT32); + _dconv_input->shape({1, 4, 4, 1}); + _dconv_input->size<loco::DataType::FLOAT32>(16); + + _dconv_filter->dtype(loco::DataType::FLOAT32); + _dconv_filter->shape({1, 1, 1, 1}); + _dconv_filter->size<loco::DataType::FLOAT32>(1); + + _dconv_bias->dtype(loco::DataType::FLOAT32); + _dconv_bias->shape({1}); + _dconv_bias->size<loco::DataType::FLOAT32>(1); + + _output->from(_dconv); + } + +protected: + void init() final {} + +protected: + loco::Node *createFoldedPattern() final { return nullptr; } + +protected: + luci::CircleConst *getFoldedPattern() final + { + return loco::must_cast<luci::CircleConst *>(_output->from()); + } + +protected: + luci::CircleDepthwiseConv2D *_dconv = nullptr; + luci::CircleConst *_dconv_input = nullptr; + luci::CircleConst *_dconv_filter = nullptr; + luci::CircleConst *_dconv_bias = nullptr; +}; + +} // namespace + +TEST(FoldDepthwiseConv2DPass, name) +{ + luci::FoldDepthwiseConv2DPass pass; + auto const name = pass.name(); + ASSERT_NE(nullptr, name); +} + +TEST_F(FoldDepthwiseConv2DTest, fold_depthwise_conv2d) +{ + for (uint32_t i = 0; i < 16; ++i) + _dconv_input->at<loco::DataType::FLOAT32>(i) = 0.5; + _dconv_filter->at<loco::DataType::FLOAT32>(0) = 0.5; + + luci::FoldDepthwiseConv2DPass pass; + ASSERT_TRUE(pass.run(&_g)); + + auto folded_const = getFoldedPattern(); + EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32); + EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(0), 0.25, + std::numeric_limits<float>::min()); + EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(15), 0.25, + std::numeric_limits<float>::min()); +} + +TEST_F(FoldDepthwiseConv2DTest, fold_non_constant_NEG) +{ + _dconv->input(_input); + + luci::FoldDepthwiseConv2DPass pass; + ASSERT_FALSE(pass.run(&_g)); +} diff --git a/compiler/luci/pass/src/ForceQuantParamPass.cpp b/compiler/luci/pass/src/ForceQuantParamPass.cpp new file mode 100644 index 000000000..32d482fc1 --- /dev/null +++ b/compiler/luci/pass/src/ForceQuantParamPass.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ForceQuantParamPass.h" +#include "luci/Profile/CircleNodeID.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Log.h> + +namespace luci +{ + +namespace +{ + +void set_qparam(luci::CircleNode *node, float scale, int64_t zp) +{ + assert(node); // FIX_CALLER_UNLESS + + auto quantparam = std::make_unique<CircleQuantParam>(); + quantparam->scale.push_back(scale); + quantparam->zerop.push_back(zp); + + node->quantparam(std::move(quantparam)); +} + +} // namespace + +bool ForceQuantParamPass::run(loco::Graph *g) +{ + LOGGER(l); + INFO(l) << "ForceQuantParamPass Start" << std::endl; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto const cnode = loco::must_cast<CircleNode *>(node); + auto const name = cnode->name(); + auto target = std::find(_tensors.begin(), _tensors.end(), name); + if (target == _tensors.end()) + continue; + + auto index = target - _tensors.begin(); + auto scale = _scales[index]; + auto zp = _zerops[index]; + set_qparam(cnode, scale, zp); + + _tensors.erase(_tensors.begin() + index); + _scales.erase(_scales.begin() + index); + _zerops.erase(_zerops.begin() + index); + } + + if (_tensors.size() > 0) + { + std::string msg; + for (auto const &t : _tensors) + msg += "Tensor does not exist: " + t + ".\n"; + msg += "Please check tensor name.\n"; + throw std::runtime_error(msg); + } + + INFO(l) << "ForceQuantParamPass End" << std::endl; + return false; // one time run +} + +} // namespace luci diff --git a/compiler/luci/pass/src/ForceQuantParamPass.test.cpp b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp new file mode 100644 index 000000000..a9da7c25e --- /dev/null +++ b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/ForceQuantParamPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <gtest/gtest.h> + +namespace +{ + +using TensorVector = luci::ForceQuantParamPass::TensorVector; +using ScaleVector = luci::ForceQuantParamPass::ScaleVector; +using ZPVector = luci::ForceQuantParamPass::ZPVector; + +std::unique_ptr<luci::CircleQuantParam> make_qparam(float scale, int64_t zp) +{ + auto qparam = std::make_unique<luci::CircleQuantParam>(); + qparam->scale.push_back(scale); + qparam->zerop.push_back(zp); + + return std::move(qparam); +} + +bool check_per_tensor_qparam(luci::CircleNode *node, float scale, int64_t zp) +{ + assert(node); // FIX_CALLER_UNLESS + + auto qparam = node->quantparam(); + if (qparam->scale.size() != 1) + return false; + + if (qparam->scale[0] != scale) + return false; + + if (qparam->zerop.size() != 1) + return false; + + if (qparam->zerop[0] != zp) + return false; + + return true; +} + +/** + * Graph with a single input and a single output. + * + * [Input] + * | + * (graph body) -> implemented by insertGraphBody() + * | + * [Output] + * + */ +class SISOGraph +{ +public: + SISOGraph() = default; + +public: + void init() + { + input = g.nodes()->create<luci::CircleInput>(); + output = g.nodes()->create<luci::CircleOutput>(); + input->name("input"); + output->name("output"); + + auto graph_input = g.inputs()->create(); + input->index(graph_input->index()); + auto graph_output = g.outputs()->create(); + output->index(graph_output->index()); + + graph_input->dtype(loco::DataType::U8); + input->dtype(loco::DataType::U8); + output->dtype(loco::DataType::U8); + graph_output->dtype(loco::DataType::U8); + + input->quantparam(make_qparam(0.1, 11)); + output->quantparam(make_qparam(0.2, 12)); + + uint32_t channel_size = 16; + graph_input->shape({1, channel_size, 4, 4}); + input->shape({1, channel_size, 4, 4}); + output->shape({1, channel_size, 4, 4}); + graph_output->shape({1, channel_size, 4, 4}); + + auto graph_body = insertGraphBody(input); + output->from(graph_body); + } + + virtual ~SISOGraph() = default; + +protected: + virtual loco::Node *insertGraphBody(loco::Node *input) = 0; + +public: + loco::Graph g; + luci::CircleInput *input = nullptr; + luci::CircleOutput *output = nullptr; +}; + +class AddGraph final : public SISOGraph +{ +protected: + loco::Node *insertGraphBody(loco::Node *input) override + { + add = g.nodes()->create<luci::CircleAdd>(); + beta = g.nodes()->create<luci::CircleConst>(); + + add->dtype(loco::DataType::U8); + beta->dtype(loco::DataType::U8); + add->quantparam(make_qparam(0.1, 11)); + beta->quantparam(make_qparam(0.2, 12)); + + uint32_t channel_size = 16; + add->shape({1, 4, 4, channel_size}); + beta->shape({1, 1, 1, channel_size}); + + beta->size<loco::DataType::U8>(channel_size); + for (uint32_t i = 0; i < channel_size; i++) + { + beta->at<loco::DataType::U8>(i) = i; + } + + add->x(input); + add->y(beta); + + add->name("add"); + beta->name("beta"); + + return add; + } + +public: + luci::CircleAdd *add = nullptr; + luci::CircleConst *beta = nullptr; +}; + +} // namespace + +TEST(ForceQuantParamPassTest, simple) +{ + TensorVector tensors{"input", "add"}; + ScaleVector scales{2.0, 3.0}; + ZPVector zerops{4, 8}; + + luci::ForceQuantParamPass pass(tensors, scales, zerops); + + AddGraph g; + g.init(); + + pass.run(&g.g); + + EXPECT_TRUE(check_per_tensor_qparam(g.input, 2.0, 4)); + EXPECT_TRUE(check_per_tensor_qparam(g.add, 3.0, 8)); +} + +TEST(ForceQuantParamPassTest, name_mismatch_NEG) +{ + TensorVector tensors{"no_exist"}; + ScaleVector scales{2.0}; + ZPVector zerops{4}; + + luci::ForceQuantParamPass pass(tensors, scales, zerops); + + AddGraph g; + g.init(); + + EXPECT_THROW(pass.run(&g.g), std::runtime_error); +} diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp new file mode 100644 index 000000000..97a962cb6 --- /dev/null +++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseAddWithFullyConnectedPass.h" + +#include <luci/IR/CircleNodes.h> +#include <luci/Service/Nodes/CircleConst.h> +#include <luci/Profile/CircleNodeOrigin.h> + +namespace +{ +/** + * Fuse Add to FullyConnected if the added value is a channel(last dimension)-wise constant + * + * BEFORE + * | + * [CircleFullyConnected] + * | + * [CircleAdd] + * | + * + * AFTER + * | + * [CircleFullyConnected] [CircleAdd] (dead) + * | + * + */ +bool fuse_add_with_fc(luci::CircleFullyConnected *fc) +{ + if (not fc) + return false; + + if (fc->dtype() != loco::DataType::FLOAT32) + return false; + + if (fc->fusedActivationFunction() != luci::FusedActFunc::NONE) + return false; + + auto weights = dynamic_cast<luci::CircleConst *>(fc->weights()); + if (not weights) + return false; + + // Get add node + auto fc_output = loco::succs(fc); + if (fc_output.size() != 1) + return false; + + auto add = dynamic_cast<luci::CircleAdd *>(*fc_output.begin()); + if (not add) + return false; + if (add->dtype() != loco::DataType::FLOAT32) + return false; + + // Get addition + auto addition = add->x() == fc ? dynamic_cast<luci::CircleConst *>(add->y()) + : dynamic_cast<luci::CircleConst *>(add->x()); + + // Non-const addition + if (not addition) + return false; + + auto rank = addition->rank(); + // TODO Support scalar addition + if (rank == 0) + return false; + + for (uint32_t i = 0; i < rank - 1; i++) + { + if (addition->dim(i).value() != 1) + return false; + } + // Check the last dimesion of addition is the same with the number of neurons of FC + if (not(addition->dim(rank - 1) == weights->dim(0))) + return false; + + auto fused_bias = luci::clone(addition); + + // Add existing bias values + if (auto const_bias = dynamic_cast<luci::CircleConst *>(fc->bias())) + { + assert(const_bias->dtype() == loco::DataType::FLOAT32); + + auto bias_size = fused_bias->size<loco::DataType::FLOAT32>(); + assert(bias_size == const_bias->size<loco::DataType::FLOAT32>()); + for (uint32_t i = 0; i < bias_size; i++) + fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i); + } + + fc->bias(fused_bias); + fc->fusedActivationFunction(add->fusedActivationFunction()); + + // set origin + luci::add_origin(fc, luci::get_origin(add)); + + replace(add).with(fc); + + return true; +} + +} // namespace + +namespace luci +{ + +bool FuseAddWithFullyConnectedPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto fc = dynamic_cast<luci::CircleFullyConnected *>(node); + if (not fc) + continue; + + if (fuse_add_with_fc(fc)) + changed = true; + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp new file mode 100644 index 000000000..4cc2eb599 --- /dev/null +++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/FuseAddWithFullyConnectedPass.h" + +#include <luci/IR/CircleNodes.h> + +#include <luci/test/TestIOGraph.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template <typename T> +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector<uint32_t> &shape, + const std::vector<T> &values) +{ + auto node = g->nodes()->create<luci::CircleConst>(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size<DT>(size); \ + for (uint32_t i = 0; i < values.size(); ++i) \ + node->at<DT>(i) = values[i]; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} + +/** + * Simple graph for test + * + * BEFORE + * + * [FC] + * | + * [Add w/ Relu] + * + * AFTER + * + * [FC w/ Relu] (bias updated) + * + */ +class FCAddGraphlet +{ +public: + FCAddGraphlet() = default; + + void init(loco::Graph *g) + { + std::vector<float> weights_val(16 * 4); + _fc_f = create_const_node(g, loco::DataType::FLOAT32, {16, 4}, weights_val); + + std::vector<float> bias_val(16); + _fc_b = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, bias_val); + + _fc = g->nodes()->create<luci::CircleFullyConnected>(); + _fc->weights(_fc_f); + _fc->bias(_fc_b); + _fc->fusedActivationFunction(luci::FusedActFunc::NONE); + _fc->dtype(loco::DataType::FLOAT32); + _fc->shape({1, 16}); + _fc->name("fc"); + + std::vector<float> addition_val; + for (uint32_t i = 0; i < 16; i++) + addition_val.push_back(static_cast<float>(i)); + _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val); + + _add = g->nodes()->create<luci::CircleAdd>(); + _add->x(_fc); + _add->y(_add_c); + _add->fusedActivationFunction(luci::FusedActFunc::RELU); + _add->dtype(loco::DataType::FLOAT32); + _add->shape({1, 16}); + _add->name("add"); + } + +public: + luci::CircleFullyConnected *fc() { return _fc; } + +protected: + luci::CircleFullyConnected *_fc = nullptr; + luci::CircleAdd *_add = nullptr; + luci::CircleConst *_fc_f = nullptr; + luci::CircleConst *_fc_b = nullptr; + luci::CircleConst *_add_c = nullptr; +}; + +class FuseAddWithFCTestGraph : public TestIOGraph, public FCAddGraphlet +{ +public: + FuseAddWithFCTestGraph() = default; + + void init(void) + { + TestIOGraph::init({1, 4}, {1, 16}); + FCAddGraphlet::init(g()); + + _fc->input(input()); + + output()->from(_add); + } +}; + +class FuseAddWithFullyConnectedPassTest : public ::testing::Test +{ +public: + FuseAddWithFCTestGraph g; + luci::FuseAddWithFullyConnectedPass pass; +}; + +} // namespace + +TEST_F(FuseAddWithFullyConnectedPassTest, simple_test) +{ + g.init(); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from()); + EXPECT_NE(nullptr, fc); + + auto bias = loco::must_cast<luci::CircleConst *>(g.fc()->bias()); + for (uint32_t i = 0; i < bias->size<loco::DataType::FLOAT32>(); i++) + { + EXPECT_EQ(i, bias->at<loco::DataType::FLOAT32>(i)); + } +} diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp index 10c113574..b1cb7a418 100644 --- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp +++ b/compiler/luci/pass/src/PropagateQuantParamPass.cpp @@ -73,7 +73,13 @@ struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool> return copy_qparam(input_node, node); } - // TODO : Add more Ops (e.g., Transpose) + bool visit(luci::CircleTranspose *node) + { + auto input_node = loco::must_cast<luci::CircleNode *>(node->a()); + return copy_qparam(input_node, node); + } + + // TODO : Add more Ops (e.g., layout-changing Ops) }; } // namespace diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp index e99c7b389..c8ad87e3d 100644 --- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp +++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp @@ -358,7 +358,7 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g) // Quantize weights for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeDequantizeWeights qw(_input_dtype, _output_dtype, _granularity); + QuantizeDequantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity); auto circle_node = loco::must_cast<luci::CircleNode *>(node); circle_node->accept(&qw); } diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp index 6afc2084f..be81732f8 100644 --- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp +++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp @@ -609,6 +609,20 @@ struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<v set_act_qparam(node, i_scale, i_zp); } + void visit(luci::CircleSplitVOut *node) + { + auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); + auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); + auto i_qparam = input->quantparam(); + assert(i_qparam); + assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS + assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS + auto i_scale = i_qparam->scale[0]; + auto i_zp = i_qparam->zerop[0]; + + set_act_qparam(node, i_scale, i_zp); + } + void visit(luci::CircleUnpackOut *node) { auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input()); @@ -1157,6 +1171,7 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) case luci::CircleOpcode::REVERSE_SEQUENCE: case luci::CircleOpcode::SLICE: case luci::CircleOpcode::SPACE_TO_BATCH_ND: + case luci::CircleOpcode::SPLIT_V: case luci::CircleOpcode::STRIDED_SLICE: case luci::CircleOpcode::SUM: case luci::CircleOpcode::TILE: @@ -1176,6 +1191,7 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type) case luci::CircleOpcode::DIV: case luci::CircleOpcode::ELU: case luci::CircleOpcode::EQUAL: + case luci::CircleOpcode::EXP: case luci::CircleOpcode::FLOOR: case luci::CircleOpcode::FLOOR_DIV: case luci::CircleOpcode::GREATER: @@ -1385,7 +1401,8 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0)); overwrite_quantparam(pad_v2_input, pad_v2); - auto const_value_node = dynamic_cast<luci::CircleConst *>(pad_v2->arg(2)); + auto const_value_node = loco::must_cast<luci::CircleConst *>( + pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS auto new_const = luci::clone(const_value_node); const auto pad_v2_input_qparam = pad_v2_input->quantparam(); @@ -1458,7 +1475,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) // Quantize activation for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeActivation qa(_input_dtype, _output_dtype); + QuantizeActivation qa(_input_model_dtype, _output_model_dtype); auto circle_node = loco::must_cast<luci::CircleNode *>(node); circle_node->accept(&qa); } @@ -1466,7 +1483,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) // Quantize weights for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeWeights qw(_input_dtype, _output_dtype, _granularity); + QuantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity); auto circle_node = loco::must_cast<luci::CircleNode *>(node); circle_node->accept(&qw); } @@ -1474,7 +1491,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) // Quantize bias for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeBias qb(_input_dtype, _output_dtype, _granularity); + QuantizeBias qb(_input_model_dtype, _output_model_dtype, _granularity); auto circle_node = loco::must_cast<luci::CircleNode *>(node); circle_node->accept(&qb); } @@ -1491,20 +1508,20 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) // (2) concat has no fused activation function // (3) the input is not concatenation Op // (4) the input is not produced to Ops other than concat - propagate_concat_quantparam(concat, _output_dtype); + propagate_concat_quantparam(concat, _output_model_dtype); } // Quantize const inputs other than weights and bias for (auto node : loco::active_nodes(loco::output_nodes(g))) { auto circle_node = loco::must_cast<luci::CircleNode *>(node); - quantize_const_inputs(circle_node, _output_dtype); + quantize_const_inputs(circle_node, _output_model_dtype); } // Update qparam of output of special Ops for (auto node : loco::active_nodes(loco::output_nodes(g))) { - QuantizeSpecialActivation qsa(_input_dtype, _output_dtype); + QuantizeSpecialActivation qsa(_input_model_dtype, _output_model_dtype); auto circle_node = loco::must_cast<luci::CircleNode *>(node); circle_node->accept(&qsa); } @@ -1514,11 +1531,11 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g) for (auto node : loco::output_nodes(g)) { auto circle_node = loco::must_cast<luci::CircleOutput *>(node); - if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype) + if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_model_dtype) { - circle_node->dtype(_output_dtype); + circle_node->dtype(_output_model_dtype); auto graph_output = graph_outputs->at(circle_node->index()); - graph_output->dtype(_output_dtype); + graph_output->dtype(_output_model_dtype); } } diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp index b8cc09955..3a6d86c33 100644 --- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp +++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp @@ -189,6 +189,12 @@ void set_minmax_to_non_const(loco::Graph *g, float min, float max) if (split_node != nullptr) continue; + // Min/Max is not recorded for SplitV + // See MinMaxObserver.cpp in record_minmax module + auto splitv_node = dynamic_cast<luci::CircleSplitV *>(node); + if (splitv_node != nullptr) + continue; + auto circle_node = loco::must_cast<luci::CircleNode *>(node); auto qparam = std::make_unique<luci::CircleQuantParam>(); { @@ -410,6 +416,38 @@ private: luci::CircleConst *_split_dim = nullptr; }; +class SplitVTestGraph final : public luci::test::TestIOGraph +{ +public: + void init(void) + { + TestIOGraph::init({1, 32}, {32}); + _size_splits = create_dummy_const<Type::S32>(g(), {1}); + _split_dim = create_dummy_const<Type::S32>(g(), {1}); + _splitv = g()->nodes()->create<luci::CircleSplitV>(); + { + _splitv->input(input()); + _splitv->size_splits(_size_splits); + _splitv->split_dim(_split_dim); + } + _splitv_o1 = g()->nodes()->create<luci::CircleSplitVOut>(); + { + _splitv_o1->input(_splitv); + _splitv_o1->index(0); + } + + output()->from(_splitv_o1); + + set_minmax_to_non_const(g(), -1, 1); + } + +private: + luci::CircleSplitV *_splitv = nullptr; + luci::CircleSplitVOut *_splitv_o1 = nullptr; + luci::CircleConst *_size_splits = nullptr; + luci::CircleConst *_split_dim = nullptr; +}; + class StridedSliceTestGraph final : public SimpleTestGraph { public: @@ -1312,6 +1350,30 @@ TEST(QuantizedModelVerifierTest, Split_wrong_granularity_NEG) SUCCEED(); } +TEST(QuantizedModelVerifierTest, SplitV) +{ + TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_GRAPH(SplitVTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, SplitV_wrong_type_NEG) +{ + TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::LayerWise, Type::S16); + TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::ChannelWise, Type::S16); + TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::S16, Granularity::ChannelWise, Type::U8); + SUCCEED(); +} + +TEST(QuantizedModelVerifierTest, SplitV_wrong_granularity_NEG) +{ + TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::LayerWise); + TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::ChannelWise); + TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::S16, Granularity::ChannelWise); + SUCCEED(); +} + TEST(QuantizedModelVerifierTest, StridedSlice) { TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise); diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp index 1737e5dd6..9f7e2f17d 100644 --- a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp +++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp @@ -16,12 +16,12 @@ #include "luci/Pass/ResolveCustomOpAddPass.h" -#include "flatbuffers/flexbuffers.h" - #include <luci/IR/CircleNodes.h> #include <luci/IR/AttrFusedActFunc.h> #include <luci/Profile/CircleNodeOrigin.h> +#include <flatbuffers/flexbuffers.h> + namespace { diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp index 5e9466a63..7ebd7a429 100644 --- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp +++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp @@ -16,11 +16,11 @@ #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h" -#include "flatbuffers/flexbuffers.h" - #include <luci/IR/CircleNodes.h> #include <luci/Profile/CircleNodeOrigin.h> +#include <flatbuffers/flexbuffers.h> + namespace { diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp index 435016f9d..7ef61c253 100644 --- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp +++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp @@ -18,12 +18,11 @@ #include <luci/IR/CircleNodes.h> -#include "flatbuffers/flatbuffers.h" -#include "flatbuffers/flexbuffers.h" - #include <luci/test/TestIOGraph.h> #include <gtest/gtest.h> +#include <flatbuffers/flatbuffers.h> +#include <flatbuffers/flexbuffers.h> namespace { diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp index 216778066..1e8f681c8 100644 --- a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp +++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp @@ -16,7 +16,6 @@ #include "luci/Pass/ResolveCustomOpMatMulPass.h" -#include "flatbuffers/flexbuffers.h" #include <loco/IR/DataTypeTraits.h> #include <luci/IR/CircleNodes.h> @@ -25,6 +24,8 @@ #include <loco.h> #include <oops/InternalExn.h> +#include <flatbuffers/flexbuffers.h> + namespace { diff --git a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp index d78a587ac..f37f27742 100644 --- a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp +++ b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp @@ -16,7 +16,6 @@ #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h" -#include "flatbuffers/flexbuffers.h" #include <loco/IR/DataTypeTraits.h> #include <luci/IR/CircleNodes.h> @@ -25,6 +24,8 @@ #include <loco.h> #include <oops/InternalExn.h> +#include <flatbuffers/flexbuffers.h> + namespace { diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp new file mode 100644 index 000000000..9cba9a9e7 --- /dev/null +++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/SubstituteSplitVToSplitPass.h" + +#include <loco.h> + +#include <luci/IR/CircleNodes.h> +#include <luci/Profile/CircleNodeOrigin.h> + +namespace +{ + +void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src) +{ + auto q = src->quantparam(); + if (q == nullptr) + dst->quantparam(nullptr); + else + dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q)); +} + +// SplitV is substituted to Split if the contents of size_splits are all same +// For example, +// size_splits = [32, 32] -> substitute +// size_splits = [31, 33] -> do not substitute +bool resolve_splitv(luci::CircleSplitV *sv) +{ + auto size_splits = dynamic_cast<luci::CircleConst *>(sv->size_splits()); + if (not size_splits) + return false; + + if (size_splits->dtype() != loco::DataType::S32) + return false; + + auto num_split = size_splits->size<loco::DataType::S32>(); + if (static_cast<int32_t>(num_split) != sv->num_split()) + return false; + + if (num_split < 1) + return false; + + // Check the contents of size_splits are all same + auto first_size = size_splits->at<loco::DataType::S32>(0); + for (uint32_t i = 1; i < num_split; i++) + { + if (first_size != size_splits->at<loco::DataType::S32>(i)) + return false; + } + + auto graph = sv->graph(); + auto split_node = graph->nodes()->create<luci::CircleSplit>(); + split_node->input(sv->input()); + split_node->split_dim(sv->split_dim()); + split_node->num_split(sv->num_split()); + split_node->name(sv->name()); + copy_quantparam(split_node, sv); + luci::add_origin(split_node, luci::get_origin(sv)); + + auto succs = loco::succs(sv); + for (auto succ : succs) + { + auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ); + auto so_node = graph->nodes()->create<luci::CircleSplitOut>(); + so_node->input(split_node); + so_node->index(svo->index()); + so_node->name(svo->name()); + copy_quantparam(so_node, svo); + luci::add_origin(so_node, luci::get_origin(svo)); + + replace(svo).with(so_node); + } + + return true; +} + +} // namespace + +namespace luci +{ + +/** + * EXAMPLE (SplitV with num_split = 2) + * + * BEFORE + * [CircleNode] + * | + * [CircleSplitV] (size_splits and split_dim are ignored) + * / \ + * [CircleSplitVOut] [CircleSplitVOut] + * | | + * [CircleNode] [CircleNode] + * + * AFTER + * [CircleNode] + * / \ + * [CircleSplit] [CircleSplitV] (dead) + * / \ \ + * [CircleSplitOut] [CircleSplitOut] [CircleSplitVOut] * 2 (dead) + * | | + * [CircleNode] [CircleNode] + */ +bool SubstituteSplitVToSplitPass::run(loco::Graph *g) +{ + bool changed = false; + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + if (auto sv = dynamic_cast<luci::CircleSplitV *>(node)) + { + if (resolve_splitv(sv)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp new file mode 100644 index 000000000..6e30103f9 --- /dev/null +++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/SubstituteSplitVToSplitPass.h" + +#include <luci/test/TestIOGraph.h> + +#include <gtest/gtest.h> + +namespace +{ + +using namespace luci::test; + +const int N = 1; +const int C = 32; +const int H = 8; +const int W = 8; + +// Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp +template <typename T> +luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype, + const std::vector<uint32_t> &shape, + const std::vector<T> &values) +{ + auto node = g->nodes()->create<luci::CircleConst>(); + node->dtype(dtype); + node->rank(shape.size()); + + uint32_t size = 1; + for (uint32_t i = 0; i < shape.size(); ++i) + { + node->dim(i) = shape.at(i); + size *= shape.at(i); + } + node->shape_status(luci::ShapeStatus::VALID); + +#define INIT_VALUES(DT) \ + { \ + node->size<DT>(size); \ + for (uint32_t i = 0; i < values.size(); ++i) \ + node->at<DT>(i) = values[i]; \ + } + + switch (dtype) + { + case loco::DataType::U8: + INIT_VALUES(loco::DataType::U8); + break; + case loco::DataType::S16: + INIT_VALUES(loco::DataType::S16); + break; + case loco::DataType::S32: + INIT_VALUES(loco::DataType::S32); + break; + case loco::DataType::FLOAT32: + INIT_VALUES(loco::DataType::FLOAT32) + break; + default: + INTERNAL_EXN("create_const_node called with unsupported type"); + break; + } + return node; +} +/** + * graph having SplitV operator + * + * [CircleInput] + * | + * [CircleSplitV] + * / \ + * [CircleSplitVOut] [CircleSplitVOut] + * | | + * [CircleOutput] [CircleOutput] + */ +class SplitVGraphlet +{ +public: + SplitVGraphlet() = default; + +public: + void init(loco::Graph *g) + { + const std::vector<int32_t> splits{16, 16}; + auto size_splits = create_const_node(g, loco::DataType::S32, {2}, splits); + + const std::vector<int32_t> dim{3}; + auto split_dim = create_const_node(g, loco::DataType::S32, {1}, dim); + + _sv = g->nodes()->create<luci::CircleSplitV>(); + _sv->size_splits(size_splits); + _sv->split_dim(split_dim); + _sv->num_split(2); + _sv->name("SplitV"); + + _svo1 = g->nodes()->create<luci::CircleSplitVOut>(); + _svo1->input(_sv); + _svo1->index(0); + _svo1->name("SplitV0"); + + _svo2 = g->nodes()->create<luci::CircleSplitVOut>(); + _svo2->input(_sv); + _svo2->index(1); + _svo2->name("SplitV1"); + } + +public: + luci::CircleSplitV *split_v() { return _sv; } + luci::CircleSplitVOut *split_vo1() { return _svo1; } + luci::CircleSplitVOut *split_vo2() { return _svo2; } + +protected: + luci::CircleSplitV *_sv = nullptr; + luci::CircleSplitVOut *_svo1 = nullptr; + luci::CircleSplitVOut *_svo2 = nullptr; +}; + +class SplitVGraph : public TestIsGraphlet<1>, public TestOsGraphlet<2>, public SplitVGraphlet +{ +public: + SplitVGraph() = default; + + void init(void) + { + TestIsGraphlet<1>::init(g(), {{N, C, H, W}}); + TestOsGraphlet<2>::init(g(), {{N, C, H / 2, W / 2}, {N, C, H / 2, W / 2}}); + SplitVGraphlet::init(g()); + + split_v()->input(input(0)); + + output(0)->from(split_vo1()); + output(1)->from(split_vo2()); + } +}; + +class SubstituteSplitVToSplitPassTest : public ::testing::Test +{ +public: + SplitVGraph g; + luci::SubstituteSplitVToSplitPass pass; +}; + +} // namespace + +/** + * Optimized graph looks like below. + * + * [CircleInput] + * | + * [CircleSplit] + * / \ + * [CircleSplitOut] [CircleSplitOut] + * | | + * [CircleOutput] [CircleOutput] + */ +TEST_F(SubstituteSplitVToSplitPassTest, simple_test) +{ + g.init(); + + auto ret = pass.run(g.g()); + EXPECT_EQ(true, ret); + + auto so1 = dynamic_cast<luci::CircleSplitOut *>(g.output(0)->from()); + EXPECT_NE(nullptr, so1); + + auto so2 = dynamic_cast<luci::CircleSplitOut *>(g.output(1)->from()); + EXPECT_NE(nullptr, so2); + + EXPECT_EQ(so1->input(), so2->input()); + + auto s = dynamic_cast<luci::CircleSplit *>(so1->input()); + EXPECT_NE(nullptr, s); + + auto input = dynamic_cast<luci::CircleInput *>(s->input()); + EXPECT_NE(nullptr, input); +} + +TEST_F(SubstituteSplitVToSplitPassTest, wrong_condition_NEG) +{ + g.init(); + + g.split_v()->num_split(3); // Wrong num_split + auto ret = pass.run(g.g()); + + EXPECT_EQ(false, ret); +} diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp index 74be86a4c..f48763782 100644 --- a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp +++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp @@ -76,6 +76,18 @@ std::vector<uint32_t> node_shape(const luci::CircleNode *input) } /** + * @brief copy quantparam of src to dst + */ +void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src) +{ + auto q = src->quantparam(); + if (q == nullptr) + dst->quantparam(nullptr); + else + dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q)); +} + +/** * @brief return CircleConst ptr with values of new_shape */ luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape) @@ -130,6 +142,7 @@ bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze) auto graph = squeeze->graph(); auto reshape = graph->nodes()->create<luci::CircleReshape>(); auto shape_const = create_shape_const(graph, reshape_shape); + copy_quantparam(reshape, squeeze); reshape->name(name + "/Reshape"); luci::add_origin(reshape, luci::get_origin(squeeze)); shape_const->name(name + "/Reshape/shape"); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h index 1706b9e43..bf3ff2e8a 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h @@ -324,6 +324,19 @@ private: return true; } + bool visit(const luci::CircleSplitV *node) + { + // node's output is the input of CircleSplitVOut, thus not quantized + RETURN_FALSE_UNLESS(is_lwq(node->input())); + return true; + } + + bool visit(const luci::CircleSplitVOut *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)); + return true; + } + bool visit(const luci::CircleStridedSlice *node) { RETURN_FALSE_UNLESS(is_lwq(node)); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h index 3954bf216..9bc8b31df 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h @@ -310,6 +310,19 @@ private: return true; } + bool visit(const luci::CircleSplitV *node) + { + // node's output is the input of CircleSplitVOut, thus not quantized + RETURN_FALSE_UNLESS(is_lwq(node->input())); + return true; + } + + bool visit(const luci::CircleSplitVOut *node) + { + RETURN_FALSE_UNLESS(is_lwq(node)); + return true; + } + bool visit(const luci::CircleStridedSlice *node) { RETURN_FALSE_UNLESS(is_lwq(node)); diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h index 560abd2ff..eeec7b82b 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h @@ -310,6 +310,26 @@ private: return true; } + bool visit(const luci::CircleSplitV *node) + { + // node's output is the input of CircleSplitVOut, thus not quantized + RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16)) + return true; + } + + bool visit(const luci::CircleSplitVOut *node) + { + RETURN_FALSE_UNLESS(has_type(node, Type::S16)) + + // SplitVOut has the same qparam with the input of SplitV + auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); + auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); + return true; + } + bool visit(const luci::CircleStridedSlice *node) { RETURN_FALSE_UNLESS(has_type(node, Type::S16)) diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h index 42cd1ce55..e7dd1b072 100644 --- a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h +++ b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h @@ -317,6 +317,26 @@ private: return true; } + bool visit(const luci::CircleSplitV *node) + { + // node's output is the input of CircleSplitVOut, thus not quantized + RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8)) + return true; + } + + bool visit(const luci::CircleSplitVOut *node) + { + RETURN_FALSE_UNLESS(has_type(node, Type::U8)) + + // SplitVOut has the same qparam with the input of SplitV + auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input()); + auto input = loco::must_cast<luci::CircleNode *>(splitv->input()); + RETURN_FALSE_UNLESS(node->quantparam()); + RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]); + RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]); + return true; + } + bool visit(const luci::CircleStridedSlice *node) { RETURN_FALSE_UNLESS(has_type(node, Type::U8)) diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt new file mode 100644 index 000000000..9ca6dcb41 --- /dev/null +++ b/compiler/luci/plan/CMakeLists.txt @@ -0,0 +1,15 @@ +file(GLOB_RECURSE SOURCES "src/*.cpp") + +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_plan ${LIBRARY_TYPE} ${SOURCES}) +target_include_directories(luci_plan PRIVATE src) +target_include_directories(luci_plan PUBLIC include) +target_link_libraries(luci_plan PUBLIC loco) +target_link_libraries(luci_plan PUBLIC luci_lang) + +install(TARGETS luci_plan DESTINATION lib) +install(DIRECTORY include/ DESTINATION include + FILES_MATCHING PATTERN "*.h") diff --git a/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h new file mode 100644 index 000000000..fe966e35e --- /dev/null +++ b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__ +#define __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__ + +#include <luci/IR/CircleNode.h> + +#include <utility> + +namespace luci +{ + +class CircleNodeExecutionPlan +{ +public: + CircleNodeExecutionPlan() = delete; + + CircleNodeExecutionPlan(uint32_t order_in_plan, std::vector<uint32_t> offsets) + { + _order_in_plan = order_in_plan; + _offsets = std::move(offsets); + } + + uint32_t order_in_plan(void) const { return _order_in_plan; } + void order_in_plan(const uint32_t &order_in_plan) { _order_in_plan = order_in_plan; } + + std::vector<uint32_t> offsets(void) const { return _offsets; } + void offsets(const std::vector<uint32_t> &offsets) { _offsets = offsets; } + +private: + uint32_t _order_in_plan = 0; + std::vector<uint32_t> _offsets; +}; + +bool has_execution_plan(const luci::CircleNode *circle_node); + +void add_execution_plan(luci::CircleNode *circle_node, + const luci::CircleNodeExecutionPlan &execution_plan); + +luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node); + +} // namespace luci + +#endif // __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__ diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp new file mode 100644 index 000000000..a02ebc452 --- /dev/null +++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Plan/CircleNodeExecutionPlan.h" + +#include <loco.h> + +#include <stdexcept> +#include <utility> + +namespace +{ + +/** + * @brief Set annotation for circle node execution plan + * @note Once CircleExecutionPlanAnnotation is annotated, it should not be changed. + * If CircleExecutionPlanAnnotation is needed to be changed, create + * new CircleExecutionPlanAnnotation. + */ +class CircleExecutionPlanAnnotation final : public loco::NodeAnnotation +{ +public: + CircleExecutionPlanAnnotation() = delete; + + explicit CircleExecutionPlanAnnotation(luci::CircleNodeExecutionPlan execution_plan) + : _execution_plan{std::move(execution_plan)} + { + // Do nothing + } + +public: + const luci::CircleNodeExecutionPlan &execution_plan(void) const { return _execution_plan; } + // No setter + +private: + luci::CircleNodeExecutionPlan _execution_plan; +}; + +} // namespace + +namespace luci +{ + +bool has_execution_plan(const luci::CircleNode *circle_node) +{ + return circle_node->annot<CircleExecutionPlanAnnotation>() != nullptr; +} + +void add_execution_plan(luci::CircleNode *circle_node, + const luci::CircleNodeExecutionPlan &execution_plan) +{ + circle_node->annot<CircleExecutionPlanAnnotation>(nullptr); + circle_node->annot(std::make_unique<CircleExecutionPlanAnnotation>(execution_plan)); +} + +luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node) +{ + if (!has_execution_plan(circle_node)) + throw std::runtime_error("Cannot find CircleNodeExecutionPlanAnnotation"); + + return circle_node->annot<CircleExecutionPlanAnnotation>()->execution_plan(); +} + +} // namespace luci diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt index fdfcaf1de..ae604ab90 100644 --- a/compiler/luci/profile/CMakeLists.txt +++ b/compiler/luci/profile/CMakeLists.txt @@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_profile SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_profile ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_profile PRIVATE src) target_include_directories(luci_profile PUBLIC include) target_link_libraries(luci_profile PUBLIC loco) diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake index 687bf573a..3ccc58128 100644 --- a/compiler/luci/requires.cmake +++ b/compiler/luci/requires.cmake @@ -5,6 +5,7 @@ require("locop") require("logo") require("logo-core") require("mio-circle") +require("mio-tflite") require("oops") require("hermes") require("hermes-std") diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt index 781e6d6de..f48210b9c 100644 --- a/compiler/luci/service/CMakeLists.txt +++ b/compiler/luci/service/CMakeLists.txt @@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") file(GLOB_RECURSE TESTS "src/*.test.cpp") list(REMOVE_ITEM SOURCES ${TESTS}) -add_library(luci_service SHARED ${SOURCES}) +if (NOT LIBRARY_TYPE) + set(LIBRARY_TYPE "SHARED") +endif(NOT LIBRARY_TYPE) + +add_library(luci_service ${LIBRARY_TYPE} ${SOURCES}) target_include_directories(luci_service PRIVATE src) target_include_directories(luci_service PUBLIC include) target_link_libraries(luci_service PUBLIC luci_lang) diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp index fade2cbd0..5f6d46f2b 100644 --- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp +++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp @@ -314,8 +314,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT return input_type; } - // TODO support S16 - loco::DataType visit(const luci::CircleQuantize *) final { return loco::DataType::U8; } + loco::DataType visit(const luci::CircleQuantize *node) final { return luci::dtype_get(node); } loco::DataType visit(const luci::CircleRange *node) final { diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt index 9c1126d6f..fa05ef0fa 100644 --- a/compiler/mio-circle/CMakeLists.txt +++ b/compiler/mio-circle/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers QUIET) +nnas_find_package(FlatBuffers EXACT 1.10 QUIET) if(NOT FlatBuffers_FOUND) return() diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt index 9ef2859b9..4660e4003 100644 --- a/compiler/mio-tflite/CMakeLists.txt +++ b/compiler/mio-tflite/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers QUIET) +nnas_find_package(FlatBuffers EXACT 1.10 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)") @@ -36,3 +36,13 @@ target_link_libraries(mio_tflite_example mio_tflite) # TODO provide full tflite validation with runtime/interpreter add_executable(mio_tflite_validate example.cpp) target_link_libraries(mio_tflite_validate mio_tflite) + +nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET) + +if(NOT TensorFlowGEMMLowpSource_FOUND) + return() +endif(NOT TensorFlowGEMMLowpSource_FOUND) + +add_library(mio_tflite_inc INTERFACE) +target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}") +target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}") diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt new file mode 100644 index 000000000..39f4d9a31 --- /dev/null +++ b/compiler/mio-tflite260/CMakeLists.txt @@ -0,0 +1,49 @@ +nnas_find_package(FlatBuffers EXACT 1.12 QUIET) + +if(NOT FlatBuffers_FOUND) + message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 1.12)") + return() +endif(NOT FlatBuffers_FOUND) + +nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET) + +if(NOT TensorFlowSource_FOUND) + message(STATUS "Build mio-tflite260: FAILED (missing TensorFlowSource 2.6.0)") + return() +endif(NOT TensorFlowSource_FOUND) + +message(STATUS "Build mio-tflite260: TRUE") + +set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs") + +# NOTE Use copy of schema.fbs as to provide unified way for circle also +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs" + COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + DEPENDS "${SCHEMA_FILE}" +) + +FlatBuffers_Target(mio_tflite260 + OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite" + INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen" + SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}" + SCHEMA_FILES "schema.fbs" +) + +add_executable(mio_tflite260_example example.cpp) +target_link_libraries(mio_tflite260_example mio_tflite260) + +# Temporay tflite validation tool to replace nnkit-tflite +# TODO provide full tflite validation with runtime/interpreter +add_executable(mio_tflite260_validate example.cpp) +target_link_libraries(mio_tflite260_validate mio_tflite260) + +nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET) + +if(NOT TensorFlowGEMMLowpSource_FOUND) + return() +endif(NOT TensorFlowGEMMLowpSource_FOUND) + +add_library(mio_tflite260_inc INTERFACE) +target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}") +target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}") diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md new file mode 100644 index 000000000..970569b47 --- /dev/null +++ b/compiler/mio-tflite260/README.md @@ -0,0 +1,3 @@ +# mio-tflite260 + +_mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0. diff --git a/compiler/mio-tflite260/example.cpp b/compiler/mio-tflite260/example.cpp new file mode 100644 index 000000000..2787a3c2d --- /dev/null +++ b/compiler/mio-tflite260/example.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// This example shows how to include and use "mio-tflite260" +// +#include <mio/tflite/schema_generated.h> + +#include <fstream> +#include <iostream> +#include <vector> + +int main(int argc, char **argv) +{ + std::ifstream ifs(argv[1], std::ios_base::binary); + std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{}); + + flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()}; + + if (!tflite::VerifyModelBuffer(verifier)) + { + std::cout << "Fail" << std::endl; + return 255; + } + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt index 952857c86..42eb4f8a5 100644 --- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt +++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt @@ -1,4 +1,4 @@ -nnas_find_package(FlatBuffers REQUIRED) +nnas_find_package(FlatBuffers EXACT 1.10 REQUIRED) if (NOT FlatBuffers_FOUND) return() diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt index fc89f4da5..729bfa80a 100644 --- a/compiler/one-cmds/CMakeLists.txt +++ b/compiler/one-cmds/CMakeLists.txt @@ -41,7 +41,6 @@ set(ONE_UTILITY_FILES one-build.template.cfg onecc.template.cfg utils.py - conv_mixin_1.8.0.patch ) foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES}) diff --git a/compiler/one-cmds/conv_mixin_1.8.0.patch b/compiler/one-cmds/conv_mixin_1.8.0.patch deleted file mode 100644 index 96a0f41cf..000000000 --- a/compiler/one-cmds/conv_mixin_1.8.0.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- a/onnx_tf/handlers/backend/conv_mixin.py -+++ b/onnx_tf/handlers/backend/conv_mixin.py -@@ -98,7 +98,7 @@ - depthwise = (x_rank == 4 and len(weight_shape) == 4 and group != 1 and - not transpose and not (None in weight_shape)) - if depthwise and isinstance(x_shape, np.ndarray): -- depthwise = group == x_shape[1] -+ depthwise = bool(group == x_shape[1]) - - if depthwise is True: - # Depthwise convolution. diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt index f86709489..0a0c4b14c 100644 --- a/compiler/one-cmds/how-to-use-one-commands.txt +++ b/compiler/one-cmds/how-to-use-one-commands.txt @@ -150,11 +150,14 @@ one-optimize provides network or operator transformation shown below. Current transformation options are - disable_validation : This will turn off operator validations. +- expand_broadcast_const : This will expand broadcastable constant node inputs - fold_add_v2 : This removes AddV2 operation which can be folded - fold_cast : This removes Cast operation which can be folded - fold_dequantize : This removes Dequantize operation which can be folded +- fold_dwconv : This folds Depthwise Convolution operation which can be folded - fold_sparse_to_dense : This removes SparseToDense operation which can be folded - forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition +- fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible - fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible - fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator - fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator @@ -192,6 +195,8 @@ Current transformation options are - shuffle_weight_to_16x1float32 : This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that it only converts weights whose row is a multiple of 16. - substitute_pack_to_reshape : This will convert single input Pack to Reshape. +- substitute_padv2_to_pad : This will convert certain condition PadV2 to Pad. +- substitute_splitv_to_split : This will convert certain condition SplitV to Split. - substitute_squeeze_to_reshape : This will convert certain condition Squeeze to Reshape. - substitute_strided_slice_to_reshape : This will convert certain condition StridedSlice to Reshape. - substitute_transpose_to_reshape : This will convert certain condition Transpose to Reshape. diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen index a496a54ec..726538d44 100644 --- a/compiler/one-cmds/one-codegen +++ b/compiler/one-cmds/one-codegen @@ -28,6 +28,7 @@ import os import subprocess import sys import tempfile +import shutil import utils as _utils @@ -49,6 +50,7 @@ def _get_backends_list(): The list where `one-codegen` finds its backends - `bin` folder where `one-codegen` exists - `backends` folder + - System path NOTE If there are backends of the same name in different places, the closer to the top in the list, the higher the priority. @@ -151,6 +153,10 @@ def main(): if ntpath.basename(cand) == backend_base: codegen_path = cand if not codegen_path: + # Find backend from system path + codegen_path = shutil.which(backend_base) + + if not codegen_path: raise FileNotFoundError(backend_base + ' not found') codegen_cmd = [codegen_path] + backend_args + unknown_args if _utils._is_valid_attr(args, 'command'): diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv index fbc3a75de..285191761 100644 --- a/compiler/one-cmds/one-prepare-venv +++ b/compiler/one-cmds/one-prepare-venv @@ -34,8 +34,8 @@ fi # - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md VER_TENSORFLOW=2.3.0 -VER_ONNX=1.8.0 -VER_ONNX_TF=1.8.0 +VER_ONNX=1.10.1 +VER_ONNX_TF=1.9.0 # Install tensorflow @@ -61,7 +61,7 @@ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW} ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow==6.2.2 # Install PyTorch and ONNX related -${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html +${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html # Provide install of custom onnx-tf if [ -n "${EXT_ONNX_TF_WHL}" ]; then @@ -69,23 +69,3 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then else ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} onnx-tf==${VER_ONNX_TF} fi - -# TODO remove this patch after onnx-tf next release -# apply patch for DWConv conversion bug: https://github.com/onnx/onnx-tensorflow/pull/905 -if [[ -z "${EXT_ONNX_TF_WHL}" ]]; then - PY_SITE_PACKAGES=$(${VENV_PYTHON} -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') - if [[ -d ${PY_SITE_PACKAGES} ]]; then - pushd ${PY_SITE_PACKAGES} > /dev/null - PATCH_TARGET_FILE=onnx_tf/handlers/backend/conv_mixin.py - if [[ -f "${PATCH_TARGET_FILE}" ]]; then - # if patch is already applied, error code is 1 - # catch error code and check if this is the case - set +e - patch -t -N -p1 < ${DRIVER_PATH}/conv_mixin_1.8.0.patch - ret_code=$? - [[ $ret_code -gt 1 ]] && exit $ret_code - set -e - fi - popd > /dev/null - fi -fi diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile index 798cc756c..ed6d8bd7a 100644 --- a/compiler/one-cmds/one-profile +++ b/compiler/one-cmds/one-profile @@ -157,14 +157,7 @@ def main(): profile_cmd += getattr(args, 'command').split() # run backend driver - with subprocess.Popen( - profile_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - bufsize=1) as p: - for line in p.stdout: - sys.stdout.buffer.write(line) - sys.stdout.buffer.flush() - if p.returncode != 0: - sys.exit(p.returncode) + _utils._run(profile_cmd, err_prefix=backend_base) if __name__ == '__main__': diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize index 25ef17ab1..cd623a6f8 100644 --- a/compiler/one-cmds/one-quantize +++ b/compiler/one-cmds/one-quantize @@ -88,6 +88,17 @@ def _get_parser(): type=str, help='record mode (supported: percentile/moving_average, default=percentile)') + # arguments for force_quantparam + parser.add_argument( + '--force_quantparam', + action='store_true', + help='write quantparam to the specified tensor') + parser.add_argument( + '--tensor_name', type=str, action='append', help='tensor name (string)') + parser.add_argument('--scale', type=float, action='append', help='scale (float)') + parser.add_argument( + '--zero_point', type=int, action='append', help='zero point (int)') + return parser @@ -114,8 +125,22 @@ def _verify_arg(parser, args): missing.append('-i/--input_path') if not _utils._is_valid_attr(args, 'output_path'): missing.append('-o/--output_path') + if _utils._is_valid_attr(args, 'force_quantparam'): + if not _utils._is_valid_attr(args, 'tensor_name'): + missing.append('--tensor_name') + if not _utils._is_valid_attr(args, 'scale'): + missing.append('--scale') + if not _utils._is_valid_attr(args, 'zero_point'): + missing.append('--zero_point') if len(missing): parser.error('the following arguments are required: ' + ' '.join(missing)) + if _utils._is_valid_attr(args, 'force_quantparam'): + tensors = getattr(args, 'tensor_name') + scales = getattr(args, 'scale') + zerops = getattr(args, 'zero_point') + if len(tensors) != len(scales) or len(tensors) != len(zerops): + parser.error( + 'The same number of tensor_name, scale, and zero_point should be given.') def _parse_arg(parser): @@ -128,6 +153,11 @@ def _parse_arg(parser): def _quantize(args): + if _utils._is_valid_attr(args, 'force_quantparam'): + # write quantization parameters + _write_qparam(args) + return + # get file path to log dir_path = os.path.dirname(os.path.realpath(__file__)) logfile_path = os.path.realpath(args.output_path) + '.log' @@ -233,6 +263,43 @@ def _quantize(args): _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) +def _write_qparam(args): + # get file path to log + dir_path = os.path.dirname(os.path.realpath(__file__)) + logfile_path = os.path.realpath(args.output_path) + '.log' + + with open(logfile_path, 'wb') as f: + # get driver path + circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer') + + # make a command to write qparams to the tensors + circle_quantizer_cmd = [circle_quantizer_path] + # verbose + if _utils._is_valid_attr(args, 'verbose'): + circle_quantizer_cmd.append('--verbose') + if _utils._is_valid_attr(args, 'tensor_name'): + tensor_name = getattr(args, 'tensor_name') + if _utils._is_valid_attr(args, 'scale'): + scale = getattr(args, 'scale') + if _utils._is_valid_attr(args, 'zero_point'): + zero_point = getattr(args, 'zero_point') + for (t, s, zp) in zip(tensor_name, scale, zero_point): + circle_quantizer_cmd.append('--force_quantparam') + circle_quantizer_cmd.append(t) + circle_quantizer_cmd.append(str(s)) + circle_quantizer_cmd.append(str(zp)) + # input and output path + if _utils._is_valid_attr(args, 'input_path'): + circle_quantizer_cmd.append(getattr(args, 'input_path')) + if _utils._is_valid_attr(args, 'output_path'): + circle_quantizer_cmd.append(getattr(args, 'output_path')) + + f.write((' '.join(circle_quantizer_cmd) + '\n').encode()) + + # run circle-quantizer + _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f) + + def main(): # parse arguments parser = _get_parser() diff --git a/compiler/one-cmds/tests/one-import_neg_002.test b/compiler/one-cmds/tests/one-import_neg_002.test index 738c2cba9..9cf0b1401 100644 --- a/compiler/one-cmds/tests/one-import_neg_002.test +++ b/compiler/one-cmds/tests/one-import_neg_002.test @@ -21,10 +21,16 @@ filename="${filename_ext%.*}" trap_err_onexit() { + # TF2.3.0 if grep -q "is incompatible with result type" "${filename}.log"; then echo "${filename_ext} SUCCESS" exit 0 fi + # TF2.6.0 + if grep -q "is incompatible with body result type" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi echo "${filename_ext} FAILED" exit 255 diff --git a/compiler/one-cmds/tests/one-import_neg_006.test b/compiler/one-cmds/tests/one-import_neg_006.test index 7c63ee3e4..3fb5c7df1 100644 --- a/compiler/one-cmds/tests/one-import_neg_006.test +++ b/compiler/one-cmds/tests/one-import_neg_006.test @@ -45,5 +45,8 @@ one-import tf \ --input_arrays input --input_shapes "0,299,299,3" \ --output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1 -echo "${filename_ext} FAILED" -exit 255 +# NOTE TF2.3.0 fails(which is expected) but doesn't for TF2.5(4?) and above +# https://github.com/tensorflow/tensorflow/issues/51756 for details +# TODO exit 255 +echo "${filename_ext} SKIPPED" +exit 0 diff --git a/compiler/one-cmds/tests/one-quantize_005.test b/compiler/one-cmds/tests/one-quantize_005.test new file mode 100644 index 000000000..8449df6ae --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_005.test @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.mat.q8.circle" +outputfile="./inception_v3.one-quantize_005.q8.circle" + +rm -rf ${outputfile} + +# run test with force_quantparam option +one-quantize \ +--force_quantparam \ +--tensor_name input \ +--scale 2.3 \ +--zero_point 33 \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_006.test b/compiler/one-cmds/tests/one-quantize_006.test new file mode 100644 index 000000000..92b9ebebb --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_006.test @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.mat.q8.circle" +outputfile="./inception_v3.one-quantize_006.q8.circle" + +rm -rf ${outputfile} + +# run test with force_quantparam option (multi tensors) +one-quantize \ +--force_quantparam \ +--tensor_name input \ +--scale 2.3 \ +--zero_point 33 \ +--tensor_name InceptionV3/Predictions/Reshape_1 \ +--scale 2.3 \ +--zero_point 33 \ +--input_path ${inputfile} \ +--output_path ${outputfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/one-quantize_neg_018.test b/compiler/one-cmds/tests/one-quantize_neg_018.test new file mode 100644 index 000000000..6937caf4d --- /dev/null +++ b/compiler/one-cmds/tests/one-quantize_neg_018.test @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# negative usage with invalid min_percentile + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + if grep -q "following arguments are required: --zero_point" "${filename}.log"; then + echo "${filename_ext} SUCCESS" + exit 0 + fi + + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +inputfile="./inception_v3.mat.q8.circle" +outputfile="./inception_v3.neg_018.q8.circle" + +rm -rf ${outputfile}.log + +# run test +one-quantize \ +--force_quantparam \ +--tensor_name input \ +--scale 2.3 \ +--input_path ${inputfile} \ +--output_path ${outputfile} > ${filename}.log 2>&1 + +echo "${filename_ext} FAILED" +exit 255 diff --git a/compiler/one-cmds/tests/onecc_022.cfg b/compiler/one-cmds/tests/onecc_022.cfg new file mode 100644 index 000000000..9741d5173 --- /dev/null +++ b/compiler/one-cmds/tests/onecc_022.cfg @@ -0,0 +1,18 @@ +[onecc] +one-import-tf=False +one-import-tflite=False +one-import-bcq=False +one-import-onnx=False +one-optimize=False +one-quantize=True +one-pack=False +one-codegen=False +one-profile=False + +[one-quantize] +input_path=inception_v3.mat.q8.circle +output_path=inception_v3.onecc_022.q8.circle +force_quantparam=True +tensor_name=input +scale=2.1 +zero_point=45 diff --git a/compiler/one-cmds/tests/onecc_022.test b/compiler/one-cmds/tests/onecc_022.test new file mode 100644 index 000000000..3aaa26fea --- /dev/null +++ b/compiler/one-cmds/tests/onecc_022.test @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# one-quantize + +filename_ext="$(basename -- $0)" +filename="${filename_ext%.*}" + +trap_err_onexit() +{ + echo "${filename_ext} FAILED" + exit 255 +} + +trap trap_err_onexit ERR + +configfile="onecc_022.cfg" +outputfile="inception_v3.onecc_022.q8.circle" + +rm -rf ${outputfile} + +# run test +onecc -C ${configfile} > /dev/null 2>&1 + +if [[ ! -s "${outputfile}" ]]; then + trap_err_onexit +fi + +echo "${filename_ext} SUCCESS" diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh index 694651d74..7f269530c 100644 --- a/compiler/one-cmds/tests/prepare_test_materials.sh +++ b/compiler/one-cmds/tests/prepare_test_materials.sh @@ -103,4 +103,14 @@ if [[ ! -s ${outputfile} ]]; then --output_arrays InceptionV3/Predictions/Reshape_1 fi +# prepare 'inception_v3.mat.q8.circle' file used for quantization test +inputfile="./inception_v3.circle" +outputfile="./inception_v3.mat.q8.circle" + +if [[ ! -s ${outputfile} ]]; then + ../bin/one-quantize \ + --input_path ${inputfile} \ + --output_path ${outputfile} +fi + popd > /dev/null diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py index f18dc6f56..efb01a210 100644 --- a/compiler/one-cmds/utils.py +++ b/compiler/one-cmds/utils.py @@ -29,6 +29,7 @@ class _CONSTANT: ('convert_nchw_to_nhwc', 'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.' ), + ('expand_broadcast_const', 'expand broadcastable constant node inputs'), ('nchw_to_nhwc_input_shape', 'convert the input shape of the model (argument for convert_nchw_to_nhwc)'), ('nchw_to_nhwc_output_shape', @@ -36,9 +37,11 @@ class _CONSTANT: ('fold_add_v2', 'fold AddV2 op with constant inputs'), ('fold_cast', 'fold Cast op with constant input'), ('fold_dequantize', 'fold Dequantize op'), + ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'), ('fold_sparse_to_dense', 'fold SparseToDense op'), ('forward_reshape_to_unaryop', 'Forward Reshape op'), ('fuse_add_with_tconv', 'fuse Add op to Transposed'), + ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'), ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'), ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'), ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'), @@ -74,6 +77,8 @@ class _CONSTANT: 'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.' ' Note that it only converts weights whose row is a multiple of 16'), ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'), + ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'), + ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'), ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'), ('substitute_strided_slice_to_reshape', 'convert certain condition StridedSlice to Reshape'), @@ -107,6 +112,14 @@ def _add_default_arg(parser): parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS) +def is_accumulated_arg(arg, driver): + if driver == "one-quantize": + if arg == "tensor_name" or arg == "scale" or arg == "zero_point": + return True + + return False + + def _is_valid_attr(args, attr): return hasattr(args, attr) and getattr(args, attr) @@ -124,6 +137,12 @@ def _parse_cfg(args, driver_name): raise AssertionError('configuration file must have \'' + driver_name + '\' section') for key in config[args.section]: + if is_accumulated_arg(key, driver_name): + if not _is_valid_attr(args, key): + setattr(args, key, [config[args.section][key]]) + else: + getattr(args, key).append(config[args.section][key]) + continue if not _is_valid_attr(args, key): setattr(args, key, config[args.section][key]) # if section is not given, section name is same with its driver name @@ -133,6 +152,12 @@ def _parse_cfg(args, driver_name): '\' section') secton_to_run = driver_name for key in config[secton_to_run]: + if is_accumulated_arg(key, driver_name): + if not _is_valid_attr(args, key): + setattr(args, key, [config[secton_to_run][key]]) + else: + getattr(args, key).append(config[secton_to_run][key]) + continue if not _is_valid_attr(args, key): setattr(args, key, config[secton_to_run][key]) @@ -242,33 +267,26 @@ def _run(cmd, err_prefix=None, logfile=None): err_prefix: prefix to be put before every stderr lines logfile: file stream to which both of stdout and stderr lines will be written """ - if logfile == None: - with subprocess.Popen(cmd, stderr=subprocess.PIPE, bufsize=1) as p: - for line in p.stderr: - if err_prefix: - line = f"{err_prefix}: ".encode() + line - sys.stderr.buffer.write(line) - sys.stderr.buffer.flush() - else: - with subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p: - import select - inputs = set([p.stdout, p.stderr]) - while inputs: - readable, _, _ = select.select(inputs, [], []) - for x in readable: - line = x.readline() - if len(line) == 0: - inputs.discard(x) - continue - if x == p.stdout: - out = sys.stdout - if x == p.stderr: - out = sys.stderr - if err_prefix: - line = f"{err_prefix}: ".encode() + line - out.buffer.write(line) - out.buffer.flush() + with subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p: + import select + inputs = set([p.stdout, p.stderr]) + while inputs: + readable, _, _ = select.select(inputs, [], []) + for x in readable: + line = x.readline() + if len(line) == 0: + inputs.discard(x) + continue + if x == p.stdout: + out = sys.stdout + if x == p.stderr: + out = sys.stderr + if err_prefix: + line = f"{err_prefix}: ".encode() + line + out.buffer.write(line) + out.buffer.flush() + if logfile != None: logfile.write(line) if p.returncode != 0: sys.exit(p.returncode) diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt index 80661e566..00ffb57de 100644 --- a/compiler/pota-quantization-value-test/CMakeLists.txt +++ b/compiler/pota-quantization-value-test/CMakeLists.txt @@ -1,7 +1,7 @@ unset(QUANTIZATION_VALUE_TEST) unset(QUANTIZATION_VALUE_TEST_WITH_PARAM) -nnas_find_package(FlatBuffers QUIET) +nnas_find_package(FlatBuffers EXACT 1.10 QUIET) if(NOT FlatBuffers_FOUND) message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)") return() @@ -25,7 +25,7 @@ get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py" "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY) -set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_3_0") +set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_6_0") ### ### Generate test.config diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake index 883a925df..4eb7204e1 100644 --- a/compiler/pota-quantization-value-test/requires.cmake +++ b/compiler/pota-quantization-value-test/requires.cmake @@ -2,3 +2,4 @@ require("record-minmax") require("circle-quantizer") require("circle-tensordump") require("common-artifacts") +require("mio-circle") diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt index ba019865f..6ba55c357 100644 --- a/compiler/tfl-inspect/CMakeLists.txt +++ b/compiler/tfl-inspect/CMakeLists.txt @@ -10,5 +10,5 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES}) target_include_directories(tfl-inspect PRIVATE src) target_link_libraries(tfl-inspect arser) target_link_libraries(tfl-inspect foder) -target_link_libraries(tfl-inspect mio_tflite) +target_link_libraries(tfl-inspect mio_tflite260) target_link_libraries(tfl-inspect safemain) diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake index 25857ad2b..9a7477b81 100644 --- a/compiler/tfl-inspect/requires.cmake +++ b/compiler/tfl-inspect/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-tflite") +require("mio-tflite260") require("safemain") diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp index 5be289446..41a8396bb 100644 --- a/compiler/tfl-inspect/src/Reader.cpp +++ b/compiler/tfl-inspect/src/Reader.cpp @@ -16,21 +16,34 @@ #include "Reader.h" +#include <cassert> #include <sstream> #include <string> namespace tflinspect { +// This will provide v3/v3a format neutral BuiltinOperator +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) +{ + assert(opcode != nullptr); + int8_t dp_code = opcode->deprecated_builtin_code(); + // 127 is max of int8_t which is upper bound of v3 builtin_code + // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127 + if (dp_code < 127 && dp_code >= 0) + return tflite::BuiltinOperator(dp_code); + return opcode->builtin_code(); +} + bool is_valid(const tflite::OperatorCode *opcode) { - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX); } bool is_custom(const tflite::OperatorCode *opcode) { - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return (code == tflite::BuiltinOperator_CUSTOM); } @@ -56,7 +69,7 @@ std::string opcode_name(const tflite::OperatorCode *opcode) return custom_op; } - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return tflite::EnumNameBuiltinOperator(code); } @@ -122,7 +135,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - return opcode->builtin_code(); + return tflinspect::builtin_code_neutral(opcode); } std::string Reader::opcode_name(const tflite::Operator *op) const diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h index e9e182a4b..91b7bb940 100644 --- a/compiler/tfl-inspect/src/Reader.h +++ b/compiler/tfl-inspect/src/Reader.h @@ -36,6 +36,7 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode); bool is_valid(const tflite::OperatorCode *opcode); bool is_custom(const tflite::OperatorCode *opcode); std::string opcode_name(const tflite::OperatorCode *opcode); diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt index 4421a4660..a87d30c5e 100644 --- a/compiler/tfl-verify/CMakeLists.txt +++ b/compiler/tfl-verify/CMakeLists.txt @@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES}) target_include_directories(tfl-verify PRIVATE src) target_link_libraries(tfl-verify arser) target_link_libraries(tfl-verify foder) -target_link_libraries(tfl-verify mio_tflite) +target_link_libraries(tfl-verify mio_tflite260) target_link_libraries(tfl-verify safemain) target_link_libraries(tfl-verify cwrap) diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake index 79503f325..72803d890 100644 --- a/compiler/tfl-verify/requires.cmake +++ b/compiler/tfl-verify/requires.cmake @@ -1,5 +1,5 @@ require("arser") require("foder") -require("mio-tflite") +require("mio-tflite260") require("safemain") require("cwrap") diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt index ebc873342..ac7fe4b7c 100644 --- a/compiler/tflchef/CMakeLists.txt +++ b/compiler/tflchef/CMakeLists.txt @@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND) return() endif(NOT Protobuf_FOUND) -if(NOT TARGET mio_tflite) - message(STATUS "Build tflchef: FAILED (missing mio_tflite)") +if(NOT TARGET mio_tflite260) + message(STATUS "Build tflchef: FAILED (missing mio_tflite260)") return() -endif(NOT TARGET mio_tflite) +endif(NOT TARGET mio_tflite260) # Recipe Parser add_subdirectory(proto) diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt index 43f6b8b03..413b78b15 100644 --- a/compiler/tflchef/core/CMakeLists.txt +++ b/compiler/tflchef/core/CMakeLists.txt @@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include) target_include_directories(tflchef_core PRIVATE src) target_link_libraries(tflchef_core tflchef_proto) target_link_libraries(tflchef_core tflchef_log) -target_link_libraries(tflchef_core mio_tflite) +target_link_libraries(tflchef_core mio_tflite260) target_link_libraries(tflchef_core souschef) diff --git a/compiler/tflchef/core/src/CustomOp/AddV2.cpp b/compiler/tflchef/core/src/CustomOp/AddV2.cpp index dffd336cd..557c20bce 100644 --- a/compiler/tflchef/core/src/CustomOp/AddV2.cpp +++ b/compiler/tflchef/core/src/CustomOp/AddV2.cpp @@ -17,7 +17,7 @@ #include "AddV2.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> AddV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/CustomOp/All.cpp b/compiler/tflchef/core/src/CustomOp/All.cpp index b3ae821a4..bbef5ecaa 100644 --- a/compiler/tflchef/core/src/CustomOp/All.cpp +++ b/compiler/tflchef/core/src/CustomOp/All.cpp @@ -17,7 +17,7 @@ #include "All.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> AllChef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp index 595f3b9bb..6d2c5b13b 100644 --- a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp +++ b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp @@ -17,7 +17,7 @@ #include "BatchMatMulV2.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> BatchMatMulV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp index fc429e2f7..dd458b376 100644 --- a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp +++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp @@ -17,7 +17,7 @@ #include "BroadcastTo.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> BroadcastToChef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/CustomOp/MatMul.cpp b/compiler/tflchef/core/src/CustomOp/MatMul.cpp index ba34aa8db..e7c707d37 100644 --- a/compiler/tflchef/core/src/CustomOp/MatMul.cpp +++ b/compiler/tflchef/core/src/CustomOp/MatMul.cpp @@ -17,7 +17,7 @@ #include "MatMul.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> MatMulChef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp index d12597edb..b25003227 100644 --- a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp +++ b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp @@ -17,7 +17,7 @@ #include "MatrixBandPart.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> MatrixBandPartChef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp index 9dacf7bf6..290d3c2ca 100644 --- a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp +++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp @@ -17,7 +17,7 @@ #include "MaxPoolWithArgmax.h" -#include "flatbuffers/flexbuffers.h" +#include <flatbuffers/flexbuffers.h> flatbuffers::Offset<void> MaxPoolWithArgmaxChef::value(flatbuffers::FlatBufferBuilder &fbb) const { diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp index aba20dcbf..7028bd9ac 100644 --- a/compiler/tflchef/core/src/ModelChef.cpp +++ b/compiler/tflchef/core/src/ModelChef.cpp @@ -582,8 +582,11 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) for (auto const &opcode : builtin_code_map) { tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder}; - code_builder.add_builtin_code(opcode.first); + // TODO support for opcode.first >= 127 + assert(opcode.first < 127); + code_builder.add_deprecated_builtin_code(opcode.first); code_builder.add_version(opcode.second); + code_builder.add_builtin_code(opcode.first); auto code = code_builder.Finish(); // Update OperatorCode vector code_vec.emplace_back(code); @@ -597,8 +600,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe) { auto custom_code = flatbuffer_builder->CreateString(opcode); tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder}; - code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM); + code_builder.add_deprecated_builtin_code(tflite::BuiltinOperator_CUSTOM); code_builder.add_custom_code(custom_code); + code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM); auto code = code_builder.Finish(); // Update OperatorCode vector code_vec.emplace_back(code); diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake index 4c02174b5..78bfa2d07 100644 --- a/compiler/tflchef/requires.cmake +++ b/compiler/tflchef/requires.cmake @@ -1,7 +1,7 @@ require("arser") require("nnkit") require("cwrap") -require("mio-tflite") +require("mio-tflite260") require("safemain") require("hermes") require("hermes-std") diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt index ce8b8c463..3c4c3fff6 100644 --- a/compiler/tflchef/tflite/CMakeLists.txt +++ b/compiler/tflchef/tflite/CMakeLists.txt @@ -4,6 +4,6 @@ add_library(tflchef_tflite STATIC ${SOURCES}) target_include_directories(tflchef_tflite PUBLIC include) target_include_directories(tflchef_tflite PRIVATE src) target_link_libraries(tflchef_tflite tflchef_proto) -target_link_libraries(tflchef_tflite mio_tflite) +target_link_libraries(tflchef_tflite mio_tflite260) target_link_libraries(tflchef_tflite cwrap) target_link_libraries(tflchef_tflite souschef) diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp index 51d9b5ffa..1462ee7f4 100644 --- a/compiler/tflchef/tflite/src/TFliteImport.cpp +++ b/compiler/tflchef/tflite/src/TFliteImport.cpp @@ -38,15 +38,27 @@ const char *tensor_name(const tflite::Tensor *tensor) return kEmptyTensorName; } +// This will provide v3/v3a format neutral BuiltinOperator +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) +{ + assert(opcode != nullptr); + int8_t dp_code = opcode->deprecated_builtin_code(); + // 127 is max of int8_t which is upper bound of v3 builtin_code + // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127 + if (dp_code < 127 && dp_code >= 0) + return tflite::BuiltinOperator(dp_code); + return opcode->builtin_code(); +} + bool is_valid(const tflite::OperatorCode *opcode) { - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX); } bool is_custom(const tflite::OperatorCode *opcode) { - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return (code == tflite::BuiltinOperator_CUSTOM); } @@ -92,7 +104,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - return opcode->builtin_code(); + return builtin_code_neutral(opcode); } std::string TFliteImport::opcode_name(const tflite::Operator *op) const @@ -116,7 +128,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const return opcode->custom_code()->c_str(); } - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return EnumNameBuiltinOperator(code); } diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h index 9d0a642ab..43b5bbaff 100644 --- a/compiler/tflchef/tflite/src/TFliteImport.h +++ b/compiler/tflchef/tflite/src/TFliteImport.h @@ -36,6 +36,7 @@ using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operat const char *tensor_type(const tflite::Tensor *tensor); const char *tensor_name(const tflite::Tensor *tensor); +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode); bool is_valid(const tflite::OperatorCode *opcode); bool is_custom(const tflite::OperatorCode *opcode); diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt index e6afcb6d2..83f7febad 100644 --- a/compiler/tfldump/CMakeLists.txt +++ b/compiler/tfldump/CMakeLists.txt @@ -1,7 +1,7 @@ -if(NOT TARGET mio_tflite) - message(STATUS "Build tfldump: FAILED (missing mio_tflite)") +if(NOT TARGET mio_tflite260) + message(STATUS "Build tfldump: FAILED (missing mio_tflite260)") return() -endif(NOT TARGET mio_tflite) +endif(NOT TARGET mio_tflite260) set(DRIVER "driver/Driver.cpp") @@ -10,6 +10,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(tfldump ${DRIVER} ${SOURCES}) target_include_directories(tfldump PRIVATE include) target_link_libraries(tfldump arser) -target_link_libraries(tfldump mio_tflite) +target_link_libraries(tfldump mio_tflite260) target_link_libraries(tfldump safemain) -target_link_libraries(tfldump flatbuffers) +target_link_libraries(tfldump flatbuffers-1.12) diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake index 2cdd3a391..d0f9cccba 100644 --- a/compiler/tfldump/requires.cmake +++ b/compiler/tfldump/requires.cmake @@ -1,3 +1,3 @@ require("arser") -require("mio-tflite") +require("mio-tflite260") require("safemain") diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp index 20e1343e6..7a480bc52 100644 --- a/compiler/tfldump/src/Dump.cpp +++ b/compiler/tfldump/src/Dump.cpp @@ -350,6 +350,7 @@ void dump_model(std::ostream &os, const tflite::Model *model) auto opcodes = reader.opcodes(); auto buffers = reader.buffers(); auto metadata = reader.metadata(); + auto signaturedefs = reader.signaturedefs(); // dump operator_codes os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl; @@ -357,11 +358,13 @@ void dump_model(std::ostream &os, const tflite::Model *model) for (auto opcode : opcodes) { tflite::BuiltinOperator op_code = opcode->builtin_code(); + tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code()); + auto op_name = tflread::opcode_name(opcode); auto op_version = opcode->version(); os << "[" << opcode_index << "] " << op_name << " (code: " << op_code - << ", version: " << op_version << ")" << std::endl; + << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl; opcode_index++; } @@ -389,7 +392,38 @@ void dump_model(std::ostream &os, const tflite::Model *model) os << "metadata : B(index) name" << std::endl; for (uint32_t i = 0; i < metadata->Length(); ++i) { - os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str(); + os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str() + << std::endl; + } + os << std::endl; + } + + // dump signaturedef + if (signaturedefs != nullptr) + { + os << "SignatureDef" << std::endl; + for (uint32_t i = 0; i < signaturedefs->Length(); ++i) + { + auto sign_i = signaturedefs->Get(i); + os << "S(" << i << ") " << sign_i->method_name()->c_str() << ", key(" + << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")" + << std::endl; + + auto inputs_i = sign_i->inputs(); + for (uint32_t t = 0; t < inputs_i->Length(); ++t) + { + auto inputs_i_t = inputs_i->Get(t); + os << " I T(" << t << ") " << inputs_i_t->name()->c_str() << ": " + << inputs_i_t->tensor_index() << std::endl; + } + + auto outputs_i = sign_i->outputs(); + for (uint32_t t = 0; t < outputs_i->Length(); ++t) + { + auto outputs_i_t = outputs_i->Get(t); + os << " O T(" << t << ") " << outputs_i_t->name()->c_str() << ": " + << outputs_i_t->tensor_index() << std::endl; + } } os << std::endl; } diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp index 856cc5699..8b3a96e83 100644 --- a/compiler/tfldump/src/Read.cpp +++ b/compiler/tfldump/src/Read.cpp @@ -22,15 +22,25 @@ namespace tflread { +// This will provide v3/v3a format neutral BuiltinOperator +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) +{ + assert(opcode != nullptr); + int8_t dp_code = opcode->deprecated_builtin_code(); + if (dp_code < 127 && dp_code >= 0) + return tflite::BuiltinOperator(dp_code); + return opcode->builtin_code(); +} + bool is_valid(const tflite::OperatorCode *opcode) { - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX); } bool is_custom(const tflite::OperatorCode *opcode) { - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return (code == tflite::BuiltinOperator_CUSTOM); } @@ -56,7 +66,7 @@ std::string opcode_name(const tflite::OperatorCode *opcode) return custom_op; } - tflite::BuiltinOperator code = opcode->builtin_code(); + tflite::BuiltinOperator code = builtin_code_neutral(opcode); return tflite::EnumNameBuiltinOperator(code); } @@ -82,6 +92,7 @@ Reader::Reader(const tflite::Model *model) _subgraphs = model->subgraphs(); _buffers = model->buffers(); _metadata = model->metadata(); + _signaturedefs = model->signature_defs(); auto opcodes = model->operator_codes(); for (const ::tflite::OperatorCode *opcode : *opcodes) @@ -118,7 +129,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const assert(index < _op_codes.size()); const tflite::OperatorCode *opcode = _op_codes.at(index); - return opcode->builtin_code(); + return tflread::builtin_code_neutral(opcode); } std::string Reader::opcode_name(const tflite::Operator *op) const diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h index f835be140..80f317d0b 100644 --- a/compiler/tfldump/src/Read.h +++ b/compiler/tfldump/src/Read.h @@ -36,6 +36,7 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T return ret; } +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode); bool is_valid(const tflite::OperatorCode *opcode); bool is_custom(const tflite::OperatorCode *opcode); std::string opcode_name(const tflite::OperatorCode *opcode); @@ -53,6 +54,7 @@ private: using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>; using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>; using TFliteMetadata_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>; + using TFliteSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>; public: Reader(const tflite::Model *model); @@ -69,6 +71,7 @@ public: const std::vector<int32_t> &inputs() const { return _inputs; } const std::vector<int32_t> &outputs() const { return _outputs; } const TFliteMetadata_t *metadata() const { return _metadata; } + const TFliteSignatureDef_t *signaturedefs() const { return _signaturedefs; } uint32_t num_subgraph() const { return _subgraphs->Length(); } @@ -89,6 +92,7 @@ private: const TFliteTensors_t *_tensors{nullptr}; const TFliteOperators_t *_operators{nullptr}; const TFliteMetadata_t *_metadata{nullptr}; + const TFliteSignatureDef_t *_signaturedefs{nullptr}; uint32_t _subgraph_index; std::string _subgraph_name; diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt index 3e46dd803..4ea01ad31 100644 --- a/compiler/tflite2circle/CMakeLists.txt +++ b/compiler/tflite2circle/CMakeLists.txt @@ -1,7 +1,7 @@ nnas_include(TargetRequire) unset(REQUIRED_TARGETS) -list(APPEND REQUIRED_TARGETS mio_tflite) +list(APPEND REQUIRED_TARGETS mio_tflite260) list(APPEND REQUIRED_TARGETS mio_circle) TargetRequire_Return(${REQUIRED_TARGETS}) @@ -11,8 +11,9 @@ add_executable(tflite2circle ${DRIVER} ${SOURCES}) target_include_directories(tflite2circle PRIVATE include) target_include_directories(tflite2circle PRIVATE src) target_link_libraries(tflite2circle arser) +target_link_libraries(tflite2circle foder) target_link_libraries(tflite2circle safemain) -target_link_libraries(tflite2circle mio_tflite) +target_link_libraries(tflite2circle mio_tflite260) target_link_libraries(tflite2circle mio_circle) target_link_libraries(tflite2circle vconone) target_link_libraries(tflite2circle nncc_coverage) diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp index fc7ee4042..4015631ab 100644 --- a/compiler/tflite2circle/driver/Driver.cpp +++ b/compiler/tflite2circle/driver/Driver.cpp @@ -70,9 +70,9 @@ int entry(int argc, char **argv) std::string circle_path = arser.get<std::string>("circle"); // read tflite file tflite2circle::TFLModel tfl_model(tfl_path); - if (!tfl_model.is_valid()) + if (not tfl_model.verify_data()) { - std::cerr << "ERROR: Failed to load tflite '" << tfl_path << "'" << std::endl; + std::cerr << "ERROR: Failed to verify tflite '" << tfl_path << "'" << std::endl; return 255; } @@ -80,7 +80,7 @@ int entry(int argc, char **argv) auto flatbuffer_builder = std::make_unique<flatbuffers::FlatBufferBuilder>(1024); // convert tflite to circle - tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model}; + tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model.get_model()}; std::ofstream outfile{circle_path, std::ios::binary}; diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h index e1e35d8ff..14c4f1c12 100644 --- a/compiler/tflite2circle/include/CircleModel.h +++ b/compiler/tflite2circle/include/CircleModel.h @@ -63,12 +63,17 @@ private: public: Offset(void) = delete; - Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec); + Offset(FlatBufBuilder &fb) : _fb{fb} {}; + +public: + // TODO use _fb + void build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec); public: CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; } private: + FlatBufBuilder &_fb; CIRFlatBufVecOffset _circle_flatbuffer_vec_offset; }; @@ -79,7 +84,7 @@ private: public: CircleModel(void) = delete; - CircleModel(FlatBufBuilder &fb, TFLModel &tfl_model); + CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model); public: void model_build(void) const; diff --git a/compiler/tflite2circle/include/TFLModel.h b/compiler/tflite2circle/include/TFLModel.h index e53d62749..507667bb9 100644 --- a/compiler/tflite2circle/include/TFLModel.h +++ b/compiler/tflite2circle/include/TFLModel.h @@ -37,15 +37,14 @@ public: TFLModel(const std::string &path); public: - bool is_valid(void) { return _valid; } + const tflite::Model *get_model(void); -private: - const tflite::Model *load_model(void); +public: + bool verify_data(void); private: std::ifstream _infile; DataBuffer _data; - bool _valid; friend class CircleModel; }; diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake index 837c287b6..e39f9eeaf 100644 --- a/compiler/tflite2circle/requires.cmake +++ b/compiler/tflite2circle/requires.cmake @@ -1,5 +1,6 @@ require("arser") -require("mio-tflite") +require("foder") +require("mio-tflite260") require("mio-circle") require("safemain") require("vconone") diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp index ab0b5b507..4249f1560 100644 --- a/compiler/tflite2circle/src/CircleModel.cpp +++ b/compiler/tflite2circle/src/CircleModel.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include <cassert> #include <iostream> #include <memory> @@ -24,7 +25,8 @@ namespace tflite2circle { template <> -Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +void Offset<MetaDataBufferLink>::build(FlatBufBuilder &fb, + const TFLFlatBufVec *tflite_flatbuffer_vec) { if (tflite_flatbuffer_vec == nullptr) return; @@ -34,7 +36,7 @@ Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tfli } template <> -Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec; @@ -55,7 +57,7 @@ Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatb } template <> -Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec; @@ -278,8 +280,19 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec); } +tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode) +{ + assert(opcode != nullptr); + int8_t dp_code = opcode->deprecated_builtin_code(); + // 127 is max of int8_t which is upper bound of v3 builtin_code + // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127 + if (dp_code < 127 && dp_code >= 0) + return tflite::BuiltinOperator(dp_code); + return opcode->builtin_code(); +} + template <> -Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) +void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec) { std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec; @@ -287,7 +300,9 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite { auto custom_code = fb->CreateString(it->custom_code()); circle::OperatorCodeBuilder operator_code_builder{*fb}; - operator_code_builder.add_builtin_code(get_circle_builtin_code(it->builtin_code())); + // TODO support circle deprecated_builtin_code + auto bt_code = builtin_code_neutral(it); + operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code)); operator_code_builder.add_custom_code(custom_code); operator_code_builder.add_version(it->version()); auto code = operator_code_builder.Finish(); @@ -296,24 +311,19 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec); } -CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model) - : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb} +CircleModel::CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model) + : _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb} { - const tflite::Model *tfl_model = model.load_model(); - // verify flatbuffers - flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()), - model._data.size()}; - if (!tflite::VerifyModelBuffer(verifier)) - { - throw std::runtime_error("Failed to verify tflite"); - } + _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(fb); + _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb); + _buffers_offset = std::make_unique<Offset<BufferLink>>(fb); + _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(fb); + + _operator_codes_offset->build(fb, tfl_model->operator_codes()); + _subGraphs_offset->build(fb, tfl_model->subgraphs()); + _buffers_offset->build(fb, tfl_model->buffers()); + _metadata_buffer_offset->build(fb, tfl_model->metadata_buffer()); - _operator_codes_offset = - std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes()); - _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs()); - _buffers_offset = std::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers()); - _metadata_buffer_offset = - std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer()); model_build(); } diff --git a/compiler/tflite2circle/src/TFLModel.cpp b/compiler/tflite2circle/src/TFLModel.cpp index 33f11fb83..470b1aec7 100644 --- a/compiler/tflite2circle/src/TFLModel.cpp +++ b/compiler/tflite2circle/src/TFLModel.cpp @@ -16,6 +16,8 @@ #include <iostream> +#include <foder/FileLoader.h> + #include "TFLModel.h" namespace tflite2circle @@ -23,21 +25,21 @@ namespace tflite2circle TFLModel::TFLModel(const std::string &path) { - _infile.open(path, std::ios::binary | std::ios::in); - _valid = _infile.good(); + foder::FileLoader file_loader{path}; + _data = file_loader.load(); } -const tflite::Model *TFLModel::load_model(void) +bool TFLModel::verify_data(void) { - assert(_valid == true); - _infile.seekg(0, std::ios::end); - auto fileSize = _infile.tellg(); - _infile.seekg(0, std::ios::beg); - _data.resize(fileSize); - _infile.read(_data.data(), fileSize); - _infile.close(); - - return tflite::GetModel(_data.data()); + // verify flatbuffers + flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(_data.data()), _data.size()}; + if (not tflite::VerifyModelBuffer(verifier)) + { + return false; + } + return true; } +const tflite::Model *TFLModel::get_model(void) { return tflite::GetModel(_data.data()); } + } // namespace tflite2circle diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt index 1cf7c0c45..50ee05242 100644 --- a/compiler/vconone/CMakeLists.txt +++ b/compiler/vconone/CMakeLists.txt @@ -1,5 +1,5 @@ if (NOT VCONONE_VERSION) - set(VCONONE_VERSION 0x0000000100110000) + set(VCONONE_VERSION 0x0000000000120001) # NOTE order is [build patch minor major] # if VCONONE_VERSION is set with -D option, it will be cached # you may have to remove cache file if you remove -D option diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 1a180a35b..e15dc2685 100644 --- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -83,10 +83,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map {"topkv2_find_first_negative", "topkv2.cl"}, {"topkv2_reorder_negatives", "topkv2.cl"}, {"topkv2_store", "topkv2.cl"}, - {"radixsort_histogram", "topkv2_radixsort.cl"}, - {"radixsort_scanhistograms", "topkv2_radixsort.cl"}, - {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, - {"radixsort_reorder", "topkv2_radixsort.cl"}, {"topkv2_quicksort", "topkv2_quicksort.cl"}, {"scale_factor_symm8", "scale_factor.cl"}, }; @@ -186,10 +182,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map #include "./cl_kernels/topkv2.clembed" }, { - "topkv2_radixsort.cl", -#include "./cl_kernels/topkv2_radixsort.clembed" - }, - { "topkv2_quicksort.cl", #include "./cl_kernels/topkv2_quicksort.clembed" }, diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl deleted file mode 100644 index e9d4696b4..000000000 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// reference: -// https://code.google.com/archive/p/ocl-radix-sort/source/default/source -// OpenCL kernel sources for the CLRadixSort class -// the #include does not exist in OpenCL -// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr -// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html -// if you find this software usefull you can cite the following work in your reports or articles: -// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011. -// http://hal.archives-ouvertes.fr/hal-00596730 - -// Reference for floating point radix sort: -// http://www.codercorner.com/RadixSortRevisited.htm - -// compute the histogram for each radix and each virtual processor for the pass -__kernel void radixsort_histogram(__global float *in_key_buf, __global int *d_Histograms, - const int pass, __local int *loc_histo, const int n) -{ - int it = get_local_id(0); // i local number of the processor - int ig = get_global_id(0); // global number = i + g I - - int gr = get_group_id(0); // g group number - - int groups = get_num_groups(0); - int items = get_local_size(0); - - // set the local histograms to zero - for (int ir = 0; ir < _RADIX; ir++) - { - loc_histo[ir * items + it] = 0; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // range of keys that are analyzed by the work item - int size = n / groups / items; // size of the sub-list - int start = ig * size; // beginning of the sub-list - - unsigned int key; - int shortkey, k; - - // compute the index - // the computation depends on the transposition - for (int j = 0; j < size; j++) - { -#ifdef TRANSPOSE - k = groups * items * j + ig; -#else - k = j + start; -#endif - - key = *((__global unsigned int *)(in_key_buf + k)); - - // extract the group of _BITS bits of the pass - // the result is in the range 0.._RADIX-1 - shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1)); - - // increment the local histogram - loc_histo[shortkey * items + it]++; - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // copy the local histogram to the global one - for (int ir = 0; ir < _RADIX; ir++) - { - d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it]; - } - - barrier(CLK_GLOBAL_MEM_FENCE); -} - -// initial transpose of the list for improving -// coalescent memory access -__kernel void transpose(const __global int *invect, __global int *outvect, const int nbcol, - const int nbrow, const __global int *inperm, __global int *outperm, - __local int *blockmat, __local int *blockperm, const int tilesize) -{ - - int i0 = get_global_id(0) * tilesize; // first row index - int j = get_global_id(1); // column index - - int jloc = get_local_id(1); // local column index - - // fill the cache - for (int iloc = 0; iloc < tilesize; iloc++) - { - int k = (i0 + iloc) * nbcol + j; // position in the matrix - blockmat[iloc * tilesize + jloc] = invect[k]; -#ifdef PERMUT - blockperm[iloc * tilesize + jloc] = inperm[k]; -#endif - } - - barrier(CLK_LOCAL_MEM_FENCE); - - // first row index in the transpose - int j0 = get_group_id(1) * tilesize; - - // put the cache at the good place - for (int iloc = 0; iloc < tilesize; iloc++) - { - int kt = (j0 + iloc) * nbrow + i0 + jloc; // position in the transpose - outvect[kt] = blockmat[jloc * tilesize + iloc]; -#ifdef PERMUT - outperm[kt] = blockperm[jloc * tilesize + iloc]; -#endif - } -} - -// each virtual processor reorders its data using the scanned histogram -__kernel void radixsort_reorder(__global float *in_key, __global float *out_key, - __global int *d_Histograms, const int pass, - __global int *indices_in, __global int *indices_out, - __local int *loc_histo, const int n) -{ - - int it = get_local_id(0); - int ig = get_global_id(0); - - int gr = get_group_id(0); - int groups = get_num_groups(0); - int items = get_local_size(0); - - int start = ig * (n / groups / items); - int size = n / groups / items; - - // take the histogram in the cache - for (int ir = 0; ir < _RADIX; ir++) - { - loc_histo[ir * items + it] = d_Histograms[items * (ir * groups + gr) + it]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int newpos, shortkey, k, newpost; - unsigned int key; - - for (int j = 0; j < size; j++) - { -#ifdef TRANSPOSE - k = groups * items * j + ig; -#else - k = j + start; -#endif - float org_value = in_key[k]; - key = *(__global unsigned int *)(in_key + k); - shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1)); - - newpos = loc_histo[shortkey * items + it]; - -#ifdef TRANSPOSE - int ignew, jnew; - ignew = newpos / (n / groups / items); - jnew = newpos % (n / groups / items); - newpost = jnew * (groups * items) + ignew; -#else - newpost = newpos; -#endif - - // d_outKeys[newpost]= key; // killing line !!! - out_key[newpost] = org_value; - -#ifdef PERMUT - indices_out[newpost] = indices_in[k]; -#endif - - newpos++; - loc_histo[shortkey * items + it] = newpos; - } -} - -// perform a parallel prefix sum (a scan) on the local histograms -// (see Blelloch 1990) each workitem worries about two memories -// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html -__kernel void radixsort_scanhistograms(__global int *histo, __local int *temp, - __global int *globsum) -{ - int it = get_local_id(0); - int ig = get_global_id(0); - int decale = 1; - int n = get_local_size(0) * 2; - int gr = get_group_id(0); - - // load input into local memory - // up sweep phase - temp[2 * it] = histo[2 * ig]; - temp[2 * it + 1] = histo[2 * ig + 1]; - - // parallel prefix sum (algorithm of Blelloch 1990) - for (int d = n >> 1; d > 0; d >>= 1) - { - barrier(CLK_LOCAL_MEM_FENCE); - if (it < d) - { - int ai = decale * (2 * it + 1) - 1; - int bi = decale * (2 * it + 2) - 1; - temp[bi] += temp[ai]; - } - decale *= 2; - } - - // store the last element in the global sum vector - // (maybe used in the next step for constructing the global scan) - // clear the last element - if (it == 0) - { - globsum[gr] = temp[n - 1]; - temp[n - 1] = 0; - } - - // down sweep phase - for (int d = 1; d < n; d *= 2) - { - decale >>= 1; - barrier(CLK_LOCAL_MEM_FENCE); - - if (it < d) - { - int ai = decale * (2 * it + 1) - 1; - int bi = decale * (2 * it + 2) - 1; - - int t = temp[ai]; - temp[ai] = temp[bi]; - temp[bi] += t; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - // write results to device memory - - histo[2 * ig] = temp[2 * it]; - histo[2 * ig + 1] = temp[2 * it + 1]; - - barrier(CLK_GLOBAL_MEM_FENCE); -} - -// use the global sum for updating the local histograms -// each work item updates two values -__kernel void radixsort_pastehistograms(__global int *histo, __global int *globsum) -{ - int ig = get_global_id(0); - int gr = get_group_id(0); - - int s; - - s = globsum[gr]; - - // write results to device memory - histo[2 * ig] += s; - histo[2 * ig + 1] += s; - - barrier(CLK_GLOBAL_MEM_FENCE); -} diff --git a/docs/conf.py b/docs/conf.py index ea17db054..b59cab878 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors' author = 'Samsung Research & contributors' # The full version, including alpha/beta/rc tags -release = '1.17.0' +release = '1.18.0' # -- General configuration --------------------------------------------------- diff --git a/docs/release/1.18/index.rst b/docs/release/1.18/index.rst new file mode 100644 index 000000000..71c46585a --- /dev/null +++ b/docs/release/1.18/index.rst @@ -0,0 +1,13 @@ +.. ONE documentation master file, created by + sphinx-quickstart on Fri Oct 20 15:20:12 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +1.18 +==== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + ./release-note-1.18.0.md diff --git a/docs/release/1.18/release-note-1.18.0.md b/docs/release/1.18/release-note-1.18.0.md new file mode 100644 index 000000000..a10f10e37 --- /dev/null +++ b/docs/release/1.18/release-note-1.18.0.md @@ -0,0 +1,11 @@ +# Release Note 1.18.0 + +## ONE Compiler + +### Compiler Frontend + +- More optimization pass + - Fold DepthwiseConv2D + - Substitute SplitV to Split + - Expand BroadCast Const + - Force QuantParam diff --git a/infra/cmake/modules/ExternalSourceTools.cmake b/infra/cmake/modules/ExternalSourceTools.cmake index 0bfbaa33b..c8ca57520 100644 --- a/infra/cmake/modules/ExternalSourceTools.cmake +++ b/infra/cmake/modules/ExternalSourceTools.cmake @@ -103,7 +103,13 @@ function(ExternalSource_Download PREFIX) message(STATUS "Extract ${PREFIX}") execute_process(COMMAND ${CMAKE_COMMAND} -E tar xfz "${DOWNLOAD_PATH}" - WORKING_DIRECTORY "${TMP_DIR}") + WORKING_DIRECTORY "${TMP_DIR}" + ERROR_VARIABLE EXTRACTION_ERROR) + + if(EXTRACTION_ERROR) + message(FATAL_ERROR "Extract ${PREFIX} - failed") + endif(EXTRACTION_ERROR) + file(REMOVE "${DOWNLOAD_PATH}") message(STATUS "Extract ${PREFIX} - done") diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake new file mode 100644 index 000000000..b48239f2a --- /dev/null +++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake @@ -0,0 +1,13 @@ +function(_CMSISSource_import) + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz) + + ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}) + + set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE) + set(CMSISSource_FOUND TRUE PARENT_SCOPE) +endfunction(_CMSISSource_import) + +_CMSISSource_import() diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake new file mode 100644 index 000000000..ca6f7826d --- /dev/null +++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "5.8.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake new file mode 100644 index 000000000..0eb8eb91c --- /dev/null +++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake @@ -0,0 +1,118 @@ +function(_FlatBuffers_import) + find_package(Flatbuffers QUIET) + set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE) +endfunction(_FlatBuffers_import) + +function(_FlatBuffers_build) + if(NOT BUILD_FLATBUFFERS) + message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF") + return() + endif(NOT BUILD_FLATBUFFERS) + + nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET) + + if(NOT FlatBuffersSource_FOUND) + # Source is not available + message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found") + return() + endif(NOT FlatBuffersSource_FOUND) + + set(ADDITIONAL_CXX_FLAGS "") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0) + set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess") + endif() + + nnas_include(ExternalBuildTools) + ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR} + BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build + INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10 + BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS} + IDENTIFIER "1.10-fix4" + EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON" + PKG_NAME "FLATBUFFERS-1.10") + +endfunction(_FlatBuffers_build) + +_FlatBuffers_build() +_FlatBuffers_import() + +if(FlatBuffers_FOUND) + if(NOT TARGET flatbuffers-1.10) + add_library(flatbuffers-1.10 INTERFACE) + target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers) + message(STATUS "Found FlatBuffers-1.10: TRUE") + endif(NOT TARGET flatbuffers-1.10) + + function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR) + get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE) + get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE) + + foreach(schema ${ARGN}) + get_filename_component(schema_fn "${schema}" NAME) + get_filename_component(dir "${schema}" DIRECTORY) + + get_filename_component(schema_fn_we "${schema_fn}" NAME_WE) + + list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}") + list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h") + endforeach() + + add_custom_command(OUTPUT ${OUTPUT_FILES} + COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}" + COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes + --no-union-value-namespacing + --gen-object-api -o "${abs_output_dir}" + ${SCHEMA_FILES} + DEPENDS flatbuffers::flatc) + + set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE) + set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE) + endfunction(FlatBuffers_Generate) + + function(FlatBuffers_Target TGT) + set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR) + set(multiValueArgs SCHEMA_FILES) + cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified + if(NOT ARG_INCLUDE_DIR) + set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR}) + endif(NOT ARG_INCLUDE_DIR) + + get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE) + get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE) + get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE) + + # Let's reset list variables before using them + # NOTE THIS DOES NOT AFFECT parent scope + unset(SCHEMA_FILES) + unset(OUTPUT_FILES) + + foreach(schema ${ARG_SCHEMA_FILES}) + get_filename_component(schema_fn "${schema}" NAME) + get_filename_component(dir "${schema}" DIRECTORY) + + get_filename_component(schema_fn_we "${schema_fn}" NAME_WE) + + list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}") + list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h") + endforeach() + + # Generate headers + add_custom_command(OUTPUT ${OUTPUT_FILES} + COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}" + COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes + --no-union-value-namespacing + --gen-object-api -o "${abs_output_dir}" + ${SCHEMA_FILES} + DEPENDS ${SCHEMA_FILES} + COMMENT "Generate '${TGT}' headers") + + # NOTE This header-only library is deliberately declared as STATIC library + # to avoid possible scope issues related with generated files + add_library(${TGT} STATIC ${OUTPUT_FILES}) + set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX) + target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}") + target_link_libraries(${TGT} PUBLIC flatbuffers-1.10) + endfunction(FlatBuffers_Target) +endif(FlatBuffers_FOUND) diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake new file mode 100644 index 000000000..6585f21d5 --- /dev/null +++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "1.10") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake new file mode 100644 index 000000000..daa749c58 --- /dev/null +++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake @@ -0,0 +1,118 @@ +function(_FlatBuffers_import) + find_package(Flatbuffers QUIET) + set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE) +endfunction(_FlatBuffers_import) + +function(_FlatBuffers_build) + if(NOT BUILD_FLATBUFFERS) + message(STATUS "FlatBuffersConfig !BUILD_FLATBUFFERS") + return() + endif(NOT BUILD_FLATBUFFERS) + + nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET) + + if(NOT FlatBuffersSource_FOUND) + # Source is not available + message(STATUS "FlatBuffersConfig !FlatBuffersSource_FOUND") + return() + endif(NOT FlatBuffersSource_FOUND) + + set(ADDITIONAL_CXX_FLAGS "") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0) + set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess") + endif() + + nnas_include(ExternalBuildTools) + ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR} + BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build + INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12 + BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS} + IDENTIFIER "1.12-fix1" + EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON" + PKG_NAME "FLATBUFFERS-1.12") + +endfunction(_FlatBuffers_build) + +_FlatBuffers_build() +_FlatBuffers_import() + +if(FlatBuffers_FOUND) + if(NOT TARGET flatbuffers-1.12) + add_library(flatbuffers-1.12 INTERFACE) + target_link_libraries(flatbuffers-1.12 INTERFACE flatbuffers::flatbuffers) + message(STATUS "Found FlatBuffers-1.12: TRUE") + endif(NOT TARGET flatbuffers-1.12) + + function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR) + get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE) + get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE) + + foreach(schema ${ARGN}) + get_filename_component(schema_fn "${schema}" NAME) + get_filename_component(dir "${schema}" DIRECTORY) + + get_filename_component(schema_fn_we "${schema_fn}" NAME_WE) + + list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}") + list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h") + endforeach() + + add_custom_command(OUTPUT ${OUTPUT_FILES} + COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}" + COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes + --no-union-value-namespacing + --gen-object-api -o "${abs_output_dir}" + ${SCHEMA_FILES} + DEPENDS flatbuffers::flatc) + + set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE) + set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE) + endfunction(FlatBuffers_Generate) + + function(FlatBuffers_Target TGT) + set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR) + set(multiValueArgs SCHEMA_FILES) + cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified + if(NOT ARG_INCLUDE_DIR) + set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR}) + endif(NOT ARG_INCLUDE_DIR) + + get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE) + get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE) + get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE) + + # Let's reset list variables before using them + # NOTE THIS DOES NOT AFFECT parent scope + unset(SCHEMA_FILES) + unset(OUTPUT_FILES) + + foreach(schema ${ARG_SCHEMA_FILES}) + get_filename_component(schema_fn "${schema}" NAME) + get_filename_component(dir "${schema}" DIRECTORY) + + get_filename_component(schema_fn_we "${schema_fn}" NAME_WE) + + list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}") + list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h") + endforeach() + + # Generate headers + add_custom_command(OUTPUT ${OUTPUT_FILES} + COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}" + COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes + --no-union-value-namespacing + --gen-object-api -o "${abs_output_dir}" + ${SCHEMA_FILES} + DEPENDS ${SCHEMA_FILES} + COMMENT "Generate '${TGT}' headers") + + # NOTE This header-only library is deliberately declared as STATIC library + # to avoid possible scope issues related with generated files + add_library(${TGT} STATIC ${OUTPUT_FILES}) + set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX) + target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}") + target_link_libraries(${TGT} PUBLIC flatbuffers-1.12) + endfunction(FlatBuffers_Target) +endif(FlatBuffers_FOUND) diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake new file mode 100644 index 000000000..8cfdbf8e5 --- /dev/null +++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "1.12") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake index da084e7d3..e551e29c8 100644 --- a/infra/cmake/packages/FlatBuffersConfig.cmake +++ b/infra/cmake/packages/FlatBuffersConfig.cmake @@ -5,6 +5,7 @@ endfunction(_FlatBuffers_import) function(_FlatBuffers_build) if(NOT BUILD_FLATBUFFERS) + message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF") return() endif(NOT BUILD_FLATBUFFERS) @@ -12,6 +13,7 @@ function(_FlatBuffers_build) if(NOT FlatBuffersSource_FOUND) # Source is not available + message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found") return() endif(NOT FlatBuffersSource_FOUND) @@ -22,12 +24,12 @@ function(_FlatBuffers_build) nnas_include(ExternalBuildTools) ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR} - BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS/build - INSTALL_DIR ${EXT_OVERLAY_DIR} + BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build + INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10 BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS} - IDENTIFIER "1.10-fix2" - EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF" - PKG_NAME "FLATBUFFERS") + IDENTIFIER "1.10-fix4" + EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON" + PKG_NAME "FLATBUFFERS-1.10") endfunction(_FlatBuffers_build) @@ -35,11 +37,11 @@ _FlatBuffers_build() _FlatBuffers_import() if(FlatBuffers_FOUND) - if(NOT TARGET flatbuffers) - add_library(flatbuffers INTERFACE) - target_link_libraries(flatbuffers INTERFACE flatbuffers::flatbuffers) - message(STATUS "Found FlatBuffers: TRUE") - endif(NOT TARGET flatbuffers) + if(NOT TARGET flatbuffers-1.10) + add_library(flatbuffers-1.10 INTERFACE) + target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers) + message(STATUS "Found FlatBuffers-1.10: TRUE") + endif(NOT TARGET flatbuffers-1.10) function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR) get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE) @@ -111,6 +113,6 @@ if(FlatBuffers_FOUND) add_library(${TGT} STATIC ${OUTPUT_FILES}) set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX) target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}") - target_link_libraries(${TGT} PUBLIC flatbuffers) + target_link_libraries(${TGT} PUBLIC flatbuffers-1.10) endfunction(FlatBuffers_Target) endif(FlatBuffers_FOUND) diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake new file mode 100644 index 000000000..8b1743066 --- /dev/null +++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake @@ -0,0 +1,13 @@ +function(_MbedOSSource_import) + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz) + + ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}) + + set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE) + set(MbedOSSource_FOUND TRUE PARENT_SCOPE) +endfunction(_MbedOSSource_import) + +_MbedOSSource_import() diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake new file mode 100644 index 000000000..acdd54ad6 --- /dev/null +++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "6.15") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake new file mode 100644 index 000000000..a9ec75d34 --- /dev/null +++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake @@ -0,0 +1,21 @@ +function(_TensorFlowEigenSource_import) + if(NOT DOWNLOAD_EIGEN) + set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_EIGEN) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + # Exact version used by TensorFlow v2.6.0. + # See tensorflow/third_party/eigen3/workspace.bzl. + envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com") + envoption(TENSORFLOW_2_6_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/12e8d57108c50d8a63605c6eb0144c838c128337/eigen-12e8d57108c50d8a63605c6eb0144c838c128337.tar.gz) + + ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.6.0-EIGEN ${TENSORFLOW_2_6_0_EIGEN_URL}) + + set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE) + set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE) +endfunction(_TensorFlowEigenSource_import) + +_TensorFlowEigenSource_import() diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake new file mode 100644 index 000000000..38ad0aa31 --- /dev/null +++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "2.6.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake new file mode 100644 index 000000000..b7f3148e8 --- /dev/null +++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake @@ -0,0 +1,20 @@ +function(_TensorFlowGEMMLowpSource_import) + if(NOT DOWNLOAD_GEMMLOWP) + set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_GEMMLOWP) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + # Exact version used by TensorFlow v2.6.0. + # See tensorflow/third_party/gemmlowp/workspace.bzl. + envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip) + + ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL}) + + set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE) + set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE) +endfunction(_TensorFlowGEMMLowpSource_import) + +_TensorFlowGEMMLowpSource_import() diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake new file mode 100644 index 000000000..38ad0aa31 --- /dev/null +++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "2.6.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake new file mode 100644 index 000000000..b4dee914f --- /dev/null +++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake @@ -0,0 +1,20 @@ +function(_TensorFlowRuySource_import) + if(NOT DOWNLOAD_RUY) + set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_RUY) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + # Exact version used by TensorFlow v2.6.0. + # See tensorflow/third_party/ruy/workspace.bzl + envoption(TENSORFLOW_2_6_0_RUY_URL https://github.com/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip) + + ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL}) + + set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE) + set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE) +endfunction(_TensorFlowRuySource_import) + +_TensorFlowRuySource_import() diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake new file mode 100644 index 000000000..38ad0aa31 --- /dev/null +++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "2.6.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake new file mode 100644 index 000000000..611c7c805 --- /dev/null +++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake @@ -0,0 +1,18 @@ +function(_TensorFlowSource_import) + if(NOT DOWNLOAD_TENSORFLOW) + set(TensorFlowSource_FOUND FALSE PARENT_SCOPE) + return() + endif(NOT DOWNLOAD_TENSORFLOW) + + nnas_include(ExternalSourceTools) + nnas_include(OptionTools) + + envoption(TENSORFLOW_2_6_0_URL https://github.com/tensorflow/tensorflow/archive/v2.6.0.tar.gz) + + ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL}) + + set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE) + set(TensorFlowSource_FOUND TRUE PARENT_SCOPE) +endfunction(_TensorFlowSource_import) + +_TensorFlowSource_import() diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake new file mode 100644 index 000000000..38ad0aa31 --- /dev/null +++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake @@ -0,0 +1,10 @@ +set(PACKAGE_VERSION "2.6.0") +set(PACKAGE_VERSION_EXACT FALSE) +set(PACKAGE_VERSION_COMPATIBLE FALSE) +set(PACKAGE_VERSION_UNSUITABLE TRUE) + +if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + set(PACKAGE_VERSION_COMPATIBLE TRUE) + set(PACKAGE_VERSION_UNSUITABLE FALSE) +endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog index 6859255ff..12af5f928 100644 --- a/infra/debian/compiler/changelog +++ b/infra/debian/compiler/changelog @@ -1,3 +1,9 @@ +one (1.18.0) bionic; urgency=medium + + * More optimization pass + + -- seongwoo <mhs4670go@naver.com> Fri, 15 Oct 2021 15:23:20 +0900 + one (1.17.0) bionic; urgency=medium * More optimization pass diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install index ba628545b..cbca47802 100644 --- a/infra/debian/compiler/one-compiler.install +++ b/infra/debian/compiler/one-compiler.install @@ -3,7 +3,6 @@ usr/bin/circle2circle usr/share/one/bin/ usr/bin/circle_partitioner usr/share/one/bin/ usr/bin/circle-quantizer usr/share/one/bin/ -usr/bin/conv_mixin_1.8.0.patch usr/share/one/bin/ usr/bin/generate_bcq_metadata.py usr/share/one/bin/ usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/ usr/bin/model2nnpkg.sh usr/share/one/bin/ diff --git a/infra/debian/compiler/one-compiler.links b/infra/debian/compiler/one-compiler.links index 8b6e542c1..9e464352a 100644 --- a/infra/debian/compiler/one-compiler.links +++ b/infra/debian/compiler/one-compiler.links @@ -13,4 +13,5 @@ usr/share/one/lib/libluci_log.so usr/lib/libluci_log.so usr/share/one/lib/libluci_partition.so usr/lib/libluci_partition.so usr/share/one/lib/libluci_pass.so usr/lib/libluci_pass.so usr/share/one/lib/libluci_profile.so usr/lib/libluci_profile.so +usr/share/one/lib/libluci_plan.so usr/lib/libluci_plan.so usr/share/one/lib/libluci_service.so usr/lib/libluci_service.so diff --git a/infra/debian/compiler/rules b/infra/debian/compiler/rules index 21b956b2f..e42faae09 100755 --- a/infra/debian/compiler/rules +++ b/infra/debian/compiler/rules @@ -1,7 +1,7 @@ #!/usr/bin/make -f export DH_VERBOSE = 1 export NNAS_BUILD_PREFIX = build -export PRESET = 20210706 +export PRESET = 20210910 export _DESTDIR = debian/tmp/usr %: diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog index 4a41d959c..ee0d3e6ee 100644 --- a/infra/debian/runtime/changelog +++ b/infra/debian/runtime/changelog @@ -1,3 +1,9 @@ +one (1.18.0) bionic; urgency=low + + * Synch up version with ONE Compiler + + -- Chunseok Lee <chunseok.lee@samsung.com> Fri, 15 Oct 2021 15:23:00 +0900 + one (1.17.0) bionic; urgency=low * New gpu_gl backend supports the following operations : Add, Convolution, Depthwise Convolution, Pooling, Reshape, Relu, Softmax diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt index eb279902e..bde684938 100644 --- a/infra/nncc/CMakeLists.txt +++ b/infra/nncc/CMakeLists.txt @@ -130,6 +130,11 @@ option(ENABLE_STRICT_BUILD "Treat warning as error" OFF) # Check our ProtobufConfig.cmake for its usage. option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF) +# This option might be turned ON for MCU builds of luci related components. +# It specify which library type to use for build: +# if set ON - luci libraries are static, otherwise - shared. +option(STATIC_LUCI "Build luci as a static libraries" OFF) + ### ### Target ### diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount index 64aaace9b..65aea8bae 100644 --- a/infra/nncc/command/utcount +++ b/infra/nncc/command/utcount @@ -14,7 +14,7 @@ oops pepper-assert \ hermes hermes-std \ loco locop locomotiv logo-core logo \ foder souschef arser vconone crew \ -safemain mio-circle mio-tflite \ +safemain mio-circle mio-tflite mio-tflite260 \ tflite2circle \ luci \ luci-interpreter \ diff --git a/infra/packaging/build b/infra/packaging/build index 8d3230010..53d63713b 100644 --- a/infra/packaging/build +++ b/infra/packaging/build @@ -8,7 +8,7 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then fi # The default preset -PRESET="20210706" +PRESET="20210910" EXTRA_OPTIONS=() while [ "$#" -ne 0 ]; do diff --git a/infra/packaging/preset/20210910 b/infra/packaging/preset/20210910 new file mode 100644 index 000000000..d00b1ccad --- /dev/null +++ b/infra/packaging/preset/20210910 @@ -0,0 +1,55 @@ +#!/bin/bash + +# NOTE purpose of this file is static analysis only +# new official preset will be added when new programs are ready + +PRESET="20210910" + +function preset_configure() +{ + REQUIRED_UNITS=() + # Common Libraries + REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp") + REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew") + REQUIRED_UNITS+=("souschef") + REQUIRED_UNITS+=("safemain") + REQUIRED_UNITS+=("arser") + REQUIRED_UNITS+=("vconone") + # Hermes Logging Framework + REQUIRED_UNITS+=("hermes" "hermes-std") + # loco IR and related utilities + REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") + # Flatbuffer I/O + REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle") + # Circle compiler library (.circle -> .circle) + REQUIRED_UNITS+=("luci") + # Tools + REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef") + REQUIRED_UNITS+=("circle-tensordump" "circledump") + REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify") + REQUIRED_UNITS+=("luci-eval-driver") + REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5") + REQUIRED_UNITS+=("circle-partitioner") + REQUIRED_UNITS+=("one-cmds") + REQUIRED_UNITS+=("bcq-tools") + + NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)} + + # TODO Use "nncc configure" and "nncc build" + cmake \ + -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \ + -DCMAKE_BUILD_TYPE=release \ + -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \ + -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \ + ${EXTRA_OPTIONS[@]} \ + "${NNAS_PROJECT_PATH}/infra/nncc" +} + +function preset_install() +{ + install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \ + "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh" + + # Install tf2nnpkg + install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg" +} diff --git a/infra/packaging/preset/20210910_windows b/infra/packaging/preset/20210910_windows new file mode 100644 index 000000000..642bdbd76 --- /dev/null +++ b/infra/packaging/preset/20210910_windows @@ -0,0 +1,67 @@ +#!/bin/bash + +function preset_configure() +{ + REQUIRED_UNITS=() + # Common Libraries + REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp") + REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew") + REQUIRED_UNITS+=("souschef") + REQUIRED_UNITS+=("safemain") + REQUIRED_UNITS+=("arser") + REQUIRED_UNITS+=("vconone") + # Hermes Logging Framework + REQUIRED_UNITS+=("hermes" "hermes-std") + # loco IR and related utilities + REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") + # Flatbuffer I/O + REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle") + # Circle compiler library (.circle -> .circle) + REQUIRED_UNITS+=("luci") + # Tools + REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef") + REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify") + REQUIRED_UNITS+=("luci-eval-driver") + REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5") + REQUIRED_UNITS+=("circle-partitioner") + REQUIRED_UNITS+=("one-cmds") + REQUIRED_UNITS+=("bcq-tools") + + NPROC=$(cat /proc/cpuinfo | grep -c processor) + + # TODO Use "nncc configure" and "nncc build" + cmake \ + -G "MSYS Makefiles" \ + -DUSE_PROTOBUF_LEGACY_IMPORT=ON \ + -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \ + -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \ + -DENABLE_TEST=OFF \ + -DDOWNLOAD_GTEST=OFF \ + -DBUILD_GTEST=OFF \ + -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \ + -DCMAKE_BUILD_TYPE=release \ + -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \ + -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \ + ${EXTRA_OPTIONS[@]} \ + "${NNAS_PROJECT_PATH}/infra/nncc" +} + +function preset_install() +{ + # Install libraries to bin/ for Windows release + mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin + rm -rf ${NNCC_INSTALL_PREFIX}/lib + + install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \ + "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh" + + # Install tf2nnpkg + install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210910" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg" + + # Though you have to install tensorflow to run 'tf2tfliteV2', + # tensorflow can't be installed in mingw. First, You can install tensorflow + # from Window native CMD(run as administrator) with python virtual environment. + # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv" +} diff --git a/infra/packaging/res/tf2nnpkg.20210910 b/infra/packaging/res/tf2nnpkg.20210910 new file mode 100644 index 000000000..0d44818a1 --- /dev/null +++ b/infra/packaging/res/tf2nnpkg.20210910 @@ -0,0 +1,109 @@ +#!/bin/bash + +set -e + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +command_exists() { + if [ "$#" -le 0 ]; then + return 1 + fi + command -v "$@" > /dev/null 2>&1 +} + +usage() +{ + echo "Convert TensorFlow model to nnpackage." + echo "Usage: tf2nnpkg" + echo " --info <path/to/info>" + echo " --graphdef <path/to/pb>" + echo " -o <path/to/nnpkg/directory>" + echo " --v2 (optional) Use TF 2.x interface" + exit 255 +} + +TF_INTERFACE="--v1" + +# Parse command-line arguments +# +while [ "$#" -ne 0 ]; do + CUR="$1" + + case $CUR in + '--help') + usage + ;; + '--info') + export INFO_FILE="$2" + shift 2 + ;; + '--graphdef') + export GRAPHDEF_FILE="$2" + shift 2 + ;; + '-o') + export OUTPUT_DIR="$2" + shift 2 + ;; + '--v2') + TF_INTERFACE="--v2" + shift + ;; + *) + echo "${CUR}" + shift + ;; + esac +done + +if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then + echo "pb is not found. Please check --graphdef is correct." + exit 2 +fi + +if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then + echo "info is not found. Please check --info is correct." + exit 2 +fi + +if [ -z ${OUTPUT_DIR} ]; then + echo "output directory is not specifed. Please check -o is correct.." + exit 2 +fi + +FILE_BASE=$(basename ${GRAPHDEF_FILE}) +MODEL_NAME="${FILE_BASE%.*}" +TMPDIR=$(mktemp -d) +trap "{ rm -rf $TMPDIR; }" EXIT + +# activate python virtual environment +VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate" +VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate" + +if [ -e ${VIRTUALENV_LINUX} ]; then + source ${VIRTUALENV_LINUX} +elif [ -e ${VIRTUALENV_WINDOWS} ]; then + source ${VIRTUALENV_WINDOWS} +fi + +# parse inputs, outputs from info file +INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s) +OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s) + +INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':') + +ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} " +ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} " +ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle " +ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} " +ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} " +if [ ! -z ${INPUT_SHAPES} ]; then + ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} " +fi + +${ONE_IMPORT_BCQ_SCRIPT} + +# optimize +"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle" + +"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle" diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh index a63140eaf..e520dd381 100644 --- a/infra/scripts/compiler_modules.sh +++ b/infra/scripts/compiler_modules.sh @@ -8,7 +8,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec" DEBUG_BUILD_ITEMS+=";hermes;hermes-std" DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo" DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone" -DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite" +DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite;mio-tflite260" DEBUG_BUILD_ITEMS+=";tflite2circle" DEBUG_BUILD_ITEMS+=";luci" DEBUG_BUILD_ITEMS+=";luci-interpreter" diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh index 65963f4b8..475da6d06 100755 --- a/infra/scripts/docker_collect_nnpkg_resources.sh +++ b/infra/scripts/docker_collect_nnpkg_resources.sh @@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") # Circle compiler library (.circle -> .circle) REQUIRED_UNITS+=("luci") # Flatbuffer I/O -REQUIRED_UNITS+=("mio-tflite" "mio-circle") +REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle") # Tools REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter") REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify") diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec index 0d170e7ed..4133d7a06 100644 --- a/packaging/nnfw.spec +++ b/packaging/nnfw.spec @@ -1,9 +1,9 @@ Name: nnfw Summary: nnfw -Version: 1.17.0 +Version: 1.18.0 Release: 1 Group: Development -License: Apache-2.0 and MIT and BSD-2-Clause +License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0 Source0: %{name}-%{version}.tar.gz Source1: %{name}.manifest diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.recipe b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe new file mode 100644 index 000000000..0eafec931 --- /dev/null +++ b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe @@ -0,0 +1,68 @@ +operand { + name: "ifm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "relu" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operand { + name: "padding" + type: INT32 + shape { dim: 4 dim: 2 } + filler { + tag: "explicit" + arg: "0" arg: "0" + arg: "1" arg: "1" + arg: "1" arg: "1" + arg: "0" arg: "0" + } +} +operand { + name: "constant_values" + type: FLOAT32 + shape { dim: 1 } + filler { + tag: "explicit" + arg: "-100.00" + } +} +operand { + name: "padv2" + type: FLOAT32 + shape { dim: 1 dim: 5 dim: 5 dim: 2 } +} +operand { + name: "ofm" + type: FLOAT32 + shape { dim: 1 dim: 3 dim: 3 dim: 2 } +} +operation { + type: "ReLU" + input: "ifm" + output: "relu" +} +operation { + type: "PadV2" + input: "relu" + input: "padding" + input: "constant_values" + output: "padv2" +} +operation { + type: "MaxPool2D" + maxpool2d_options { + padding: VALID + stride_w: 1 + stride_h: 1 + filter_height: 3 + filter_width: 3 + } + input: "padv2" + output: "ofm" +} + +input: "ifm" +output: "ofm" diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.rule b/res/TensorFlowLiteRecipes/PadV2_001/test.rule new file mode 100644 index 000000000..29b080b1e --- /dev/null +++ b/res/TensorFlowLiteRecipes/PadV2_001/test.rule @@ -0,0 +1,8 @@ +# To check if PadV2 is converted to Pad + +RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1 + +RULE "PAD_EXIST" $(op_count PAD) '=' 1 +RULE "MAXPOOL2D_EXIST" $(op_count MAX_POOL_2D) '=' 1 +RULE "RELU_EXIST" $(op_count RELU) '=' 1 +RULE "NO_PADV2" $(op_count PADV2) '=' 0 diff --git a/res/TensorFlowLiteSchema/2.6.0/schema.fbs b/res/TensorFlowLiteSchema/2.6.0/schema.fbs new file mode 100644 index 000000000..6fc51f838 --- /dev/null +++ b/res/TensorFlowLiteSchema/2.6.0/schema.fbs @@ -0,0 +1,1240 @@ +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. +// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers. +// Version 3a: Add new builtin op code field. Has backward compatibility with +// version 3. + +namespace tflite; + +// This corresponds to the version. +file_identifier "TFL3"; +// File extension of any written files. +file_extension "tflite"; + +// IMPORTANT: All new members of tables, enums and unions must be added at the +// end to ensure backwards compatibility. + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, + BOOL = 6, + INT16 = 7, + COMPLEX64 = 8, + INT8 = 9, + FLOAT64 = 10, + COMPLEX128 = 11, + UINT64 = 12, + // Experimental: Resource and variant types are experimental, that are subject + // to change. Do not implement custom kernels using resource & variant types + // now. + RESOURCE = 13, + VARIANT = 14, + UINT32 = 15, +} + +// Custom quantization parameters for experimenting with new quantization +// techniques. +table CustomQuantization { + custom:[ubyte] (force_align: 16); +} + +// Represents a specific quantization technique's parameters. +union QuantizationDetails { + CustomQuantization, +} + +// Parameters for converting a quantized tensor back to float. +table QuantizationParameters { + // These four parameters are the asymmetric linear quantization parameters. + // Given a quantized value q, the corresponding float value f should be: + // f = scale * (q - zero_point) + // For other quantization types, the QuantizationDetails below is used. + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; // For dequantizing the tensor's values. + zero_point:[long]; + + // If this is not none, the other quantization parameters (i.e. min, max, + // scale, zero_point fields above) are ignored and the value of the + // QuantizationDetails union should be used. + details:QuantizationDetails; + + // Specifies the dimension of the Tensor's shape that the scales and + // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1] + // with quantization params: + // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1 + // will be quantized across the second dimension of t. + // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1 + // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2 + // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3 + quantized_dimension:int; +} + +// Sparse tensors. +// We use a modification of the TACO format. +// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf +// +// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1), +// potentially with a k-dimensional block (0 <= k <= n) with dims +// (dn, ..., dn+k-1), the format needs to specify: +// 1. In what order to traverse these dimensions. For example, to store a 2-D +// matrix in row major order, the traversal order would be (d0, d1), +// whereas to store it in column major order, the traversal order would be +// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order +// could be (d0, d1, d2, d3). +// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original +// tensor dimension in (d0, ..., dn-1). +// 3. In the traversal order defined above, the format (dense vs. sparse) and +// index metadata for each dimension. For a dense dimension, this is just +// the size of that dimension. For a sparse dimension, it's the same as +// the compressed index defined in the Compressed Sparse Row (CSR) format. +// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html) + +// The storage type for a dimension. Currently we support: +// 1. DENSE: each coordinate in this dimension is stored implicitly. +// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The +// compression technique is the same what CSR uses. +// More types like a sparse dimension with a different compression technique +// could be added to the list in the future. +enum DimensionType : byte { + DENSE = 0, + SPARSE_CSR = 1, +} + +table Int32Vector { + values:[int]; +} + +table Uint16Vector { + values:[ushort] (force_align: 4); +} + +table Uint8Vector { + values:[ubyte] (force_align: 4); +} + +// Variable-typed buffer to store the index metadata for a sparse dimension. +// The widest type is Int32 instead of UInt32 because tensor's shape is a int32 +// vector. We don't want the per-dimensional index to overflow that range. +union SparseIndexVector { + Int32Vector, + Uint16Vector, + Uint8Vector +} + +table DimensionMetadata { + // Whether a dimension is dense or sparse. + format:DimensionType; + // Index metadata used for a dimension. + // - If format is DimensionType.DENSE then we use the dense_size field to + // store the size of that dimension. Each index in that dimension is + // stored implicitly. + // - If format is DimensionType.SPARSE_CSR then we use array_segments and + // array_indices to encode that dimension. array_segments represents how + // to segment the indices array, each segment corresponds to one element + // in the previous dimension. array_indices represents the index of the + // non-zero elements within this dimension (as those in the CSR matrix + // format, where the first array is row pointers and the second array is + // column indices). + dense_size:int; + array_segments:SparseIndexVector; + array_indices:SparseIndexVector; +} + +// Parameters to encode a sparse TfLite tensor. +table SparsityParameters { + // The traversal order of the dimensions defined in the `shape` field of the + // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1, + // ..., dn-1), + // - if not block sparse, the traversal_order is just a permutation of (d0, + // ..., dn-1). For example, a 2-D matrix stored in row-major order would + // have traversal_order = (d0, d1). + // - if block sparse with a k-dimensional block (0 <= k <= n), the + // traversal_order has n + k elements. The first n elements are still a + // permutation of (d0, ..., dn-1). The lask k elements are a permutation + // of (dn, ..., dn+k-1), defining how to traverse a block internally. For + // example, a 2-D matrix with 2-D blocks, both stored in row-major order + // would have traversal_order = (d0, d1, d2, d3). + traversal_order:[int]; + // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n), + // stores how a block dimension in (dn, ..., dn+k-1) maps to the original + // tensor dimension in (d0, ..., dn). + // It's stored in the order of (dn, ..., dn+k-1). + // If not block-sparse, this field is NULL. + block_map:[int]; + // In the traversal order defined above, the metadata needed for + // each dimension to locate the non-zero values in the original dense tensor. + // The size of the dim_metadata array = the size of the traversal_order array + // = n + k. + dim_metadata:[DimensionMetadata]; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, height, width, number of channels] (That's + // Tensorflow's NHWC). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existent empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. + + is_variable:bool = false; + + // Parameters to encode a sparse tensor. See the example in + // tensorflow/lite/testdata/sparse_tensor.json. + sparsity:SparsityParameters; // Optional. + + // Encodes `shape` with unknown dimensions. Unknown dimensions are + // represented with -1. + shape_signature:[int]; // Optional. +} + +// A list of builtin operators. Builtin operators are slightly faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +// LINT.IfChange +enum BuiltinOperator : int32 { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + DEPTH_TO_SPACE = 5, + DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + MUL = 18, + RELU = 19, + // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed + // since different model developers use RELU1 in different ways. Never + // create another op called RELU1. + RELU_N1_TO_1 = 20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + EMBEDDING_LOOKUP_SPARSE = 33, + PAD = 34, + UNIDIRECTIONAL_SEQUENCE_RNN = 35, + GATHER = 36, + BATCH_TO_SPACE_ND = 37, + SPACE_TO_BATCH_ND = 38, + TRANSPOSE = 39, + MEAN = 40, + SUB = 41, + DIV = 42, + SQUEEZE = 43, + UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + STRIDED_SLICE = 45, + BIDIRECTIONAL_SEQUENCE_RNN = 46, + EXP = 47, + TOPK_V2 = 48, + SPLIT = 49, + LOG_SOFTMAX = 50, + // DELEGATE is a special op type for the operations which are delegated to + // other backends. + // WARNING: Experimental interface, subject to change + DELEGATE = 51, + BIDIRECTIONAL_SEQUENCE_LSTM = 52, + CAST = 53, + PRELU = 54, + MAXIMUM = 55, + ARG_MAX = 56, + MINIMUM = 57, + LESS = 58, + NEG = 59, + PADV2 = 60, + GREATER = 61, + GREATER_EQUAL = 62, + LESS_EQUAL = 63, + SELECT = 64, + SLICE = 65, + SIN = 66, + TRANSPOSE_CONV = 67, + SPARSE_TO_DENSE = 68, + TILE = 69, + EXPAND_DIMS = 70, + EQUAL = 71, + NOT_EQUAL = 72, + LOG = 73, + SUM = 74, + SQRT = 75, + RSQRT = 76, + SHAPE = 77, + POW = 78, + ARG_MIN = 79, + FAKE_QUANT = 80, + REDUCE_PROD = 81, + REDUCE_MAX = 82, + PACK = 83, + LOGICAL_OR = 84, + ONE_HOT = 85, + LOGICAL_AND = 86, + LOGICAL_NOT = 87, + UNPACK = 88, + REDUCE_MIN = 89, + FLOOR_DIV = 90, + REDUCE_ANY = 91, + SQUARE = 92, + ZEROS_LIKE = 93, + FILL = 94, + FLOOR_MOD = 95, + RANGE = 96, + RESIZE_NEAREST_NEIGHBOR = 97, + LEAKY_RELU = 98, + SQUARED_DIFFERENCE = 99, + MIRROR_PAD = 100, + ABS = 101, + SPLIT_V = 102, + UNIQUE = 103, + CEIL = 104, + REVERSE_V2 = 105, + ADD_N = 106, + GATHER_ND = 107, + COS = 108, + WHERE = 109, + RANK = 110, + ELU = 111, + REVERSE_SEQUENCE = 112, + MATRIX_DIAG = 113, + QUANTIZE = 114, + MATRIX_SET_DIAG = 115, + ROUND = 116, + HARD_SWISH = 117, + IF = 118, + WHILE = 119, + NON_MAX_SUPPRESSION_V4 = 120, + NON_MAX_SUPPRESSION_V5 = 121, + SCATTER_ND = 122, + SELECT_V2 = 123, + DENSIFY = 124, + SEGMENT_SUM = 125, + BATCH_MATMUL = 126, + PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + CUMSUM = 128, + CALL_ONCE = 129, + BROADCAST_TO = 130, + RFFT2D = 131, + CONV_3D = 132, + IMAG=133, + REAL=134, + COMPLEX_ABS=135, + HASHTABLE = 136, + HASHTABLE_FIND = 137, + HASHTABLE_IMPORT = 138, + HASHTABLE_SIZE = 139, + REDUCE_ALL = 140, + CONV_3D_TRANSPOSE = 141, + VAR_HANDLE = 142, + READ_VARIABLE = 143, + ASSIGN_VARIABLE = 144, +} +// LINT.ThenChange(nnapi_linter/linter.proto) + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, + EmbeddingLookupSparseOptions, + MulOptions, + PadOptions, + GatherOptions, + BatchToSpaceNDOptions, + SpaceToBatchNDOptions, + TransposeOptions, + ReducerOptions, + SubOptions, + DivOptions, + SqueezeOptions, + SequenceRNNOptions, + StridedSliceOptions, + ExpOptions, + TopKV2Options, + SplitOptions, + LogSoftmaxOptions, + CastOptions, + DequantizeOptions, + MaximumMinimumOptions, + ArgMaxOptions, + LessOptions, + NegOptions, + PadV2Options, + GreaterOptions, + GreaterEqualOptions, + LessEqualOptions, + SelectOptions, + SliceOptions, + TransposeConvOptions, + SparseToDenseOptions, + TileOptions, + ExpandDimsOptions, + EqualOptions, + NotEqualOptions, + ShapeOptions, + PowOptions, + ArgMinOptions, + FakeQuantOptions, + PackOptions, + LogicalOrOptions, + OneHotOptions, + LogicalAndOptions, + LogicalNotOptions, + UnpackOptions, + FloorDivOptions, + SquareOptions, + ZerosLikeOptions, + FillOptions, + BidirectionalSequenceLSTMOptions, + BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, + FloorModOptions, + RangeOptions, + ResizeNearestNeighborOptions, + LeakyReluOptions, + SquaredDifferenceOptions, + MirrorPadOptions, + AbsOptions, + SplitVOptions, + UniqueOptions, + ReverseV2Options, + AddNOptions, + GatherNdOptions, + CosOptions, + WhereOptions, + RankOptions, + ReverseSequenceOptions, + MatrixDiagOptions, + QuantizeOptions, + MatrixSetDiagOptions, + HardSwishOptions, + IfOptions, + WhileOptions, + DepthToSpaceOptions, + NonMaxSuppressionV4Options, + NonMaxSuppressionV5Options, + ScatterNdOptions, + SelectV2Options, + DensifyOptions, + SegmentSumOptions, + BatchMatMulOptions, + CumsumOptions, + CallOnceOptions, + BroadcastToOptions, + Rfft2dOptions, + Conv3DOptions, + HashtableOptions, + HashtableFindOptions, + HashtableImportOptions, + HashtableSizeOptions, + VarHandleOptions, + ReadVariableOptions, + AssignVariableOptions, +} + +enum Padding : byte { SAME, VALID } + +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU_N1_TO_1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +// Options for both Conv3D and Conv3DTranspose. +table Conv3DOptions { + padding:Padding; + stride_d:int; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_d_factor:int = 1; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + // Parameters for DepthwiseConv version 1 or above. + padding:Padding; + stride_w:int; + stride_h:int; + // `depth_multiplier` is redundant. It's used by CPU kernels in + // TensorFlow 2.0 or below, but ignored in versions above. + // See comments in lite/c/builtin_op_data.h for more details. + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; + // Parameters for DepthwiseConv version 2 or above. + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; + // For weights-only quantization, use asymmetric quantization for non + // constant inputs at evaluation time. + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow dynamic_rnn with RNNCell. +table SequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell. +table BidirectionalSequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + merge_outputs: bool; + asymmetric_quantize_inputs:bool; +} + +enum FullyConnectedOptionsWeightsFormat: byte { + DEFAULT = 0, + SHUFFLED4x16INT8 = 1, +} + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + // Parameters for FullyConnected version 1 or above. + fused_activation_function:ActivationFunctionType; + + // Parameters for FullyConnected version 2 or above. + weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT; + + // Parameters for FullyConnected version 5 or above. + // If set to true, then the number of dimension is preserved. Furthermore, + // all but the last dimension of the input and output shapes will be equal. + keep_num_dims: bool; + + // Parameters for FullyConnected version 7 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 3. + pot_scale_int16:bool = true; +} + +table MulOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + // This field is currently ignored in the L2 Norm Op. + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +enum LSTMKernelType : byte { + // Full LSTM kernel which supports peephole and projection. + FULL = 0, + // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell. + BASIC = 1, +} + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + // Parameters for LSTM version 1 or above. + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // Parameters for LSTM version 2 or above. + // Basic kernel is only supported in version 2 or above. + kernel_type: LSTMKernelType = FULL; + + // Parameters for LSTM version 4 or above. + asymmetric_quantize_inputs: bool; +} + +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true then first dimension is sequence, otherwise batch. + time_major:bool; + + // Parameter for Unidirectional Sequence LSTM version 4. + asymmetric_quantize_inputs:bool; +} + +table BidirectionalSequenceLSTMOptions { + // Parameters supported by version 1: + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true, store the outputs of both directions into the first output. + merge_outputs: bool; + + // Parameters supported by version 2: + // If true then first dimension is sequence, otherwise batch. + // Version 1 implementations assumed time_major to be true, so this default + // value should never change. + time_major: bool = true; + + // Parameters for version 3 or above. + asymmetric_quantize_inputs:bool; +} + +table ResizeBilinearOptions { + new_height: int (deprecated); + new_width: int (deprecated); + align_corners: bool; + half_pixel_centers: bool; +} + +table ResizeNearestNeighborOptions { + align_corners: bool; + half_pixel_centers: bool; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table PadOptions { +} + +table PadV2Options { +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SpaceToBatchNDOptions { +} + +table BatchToSpaceNDOptions { +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +table DepthToSpaceOptions { + block_size: int; +} + +table SubOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; +} + +table DivOptions { + fused_activation_function:ActivationFunctionType; +} + +table TopKV2Options { +} + +enum CombinerType : byte { + SUM = 0, + MEAN = 1, + SQRTN = 2, +} + +table EmbeddingLookupSparseOptions { + combiner:CombinerType; +} + +table GatherOptions { + axis: int; + // Parameters for Gather version 5 or above. + batch_dims: int = 0; +} + +table TransposeOptions { +} + +table ExpOptions { +} + +table CosOptions { +} + +table ReducerOptions { + keep_dims: bool; +} + +table SqueezeOptions { + squeeze_dims:[int]; +} + +table SplitOptions { + num_splits: int; +} + +table SplitVOptions { + num_splits: int; +} + +table StridedSliceOptions { + begin_mask: int; + end_mask: int; + ellipsis_mask: int; + new_axis_mask: int; + shrink_axis_mask: int; +} + +table LogSoftmaxOptions { +} + +table CastOptions { + in_data_type: TensorType; + out_data_type: TensorType; +} + +table DequantizeOptions { +} + +table MaximumMinimumOptions { +} + +table TileOptions { +} + +table ArgMaxOptions { + output_type : TensorType; +} + +table ArgMinOptions { + output_type : TensorType; +} + +table GreaterOptions { +} + +table GreaterEqualOptions { +} + +table LessOptions { +} + +table LessEqualOptions { +} + +table NegOptions { +} + +table SelectOptions { +} + +table SliceOptions { +} + +table TransposeConvOptions { + padding:Padding; + stride_w:int; + stride_h:int; +} + +table ExpandDimsOptions { +} + +table SparseToDenseOptions { + validate_indices:bool; +} + +table EqualOptions { +} + +table NotEqualOptions { +} + +table ShapeOptions { + // Optional output type of the operation (int32 or int64). Defaults to int32. + out_type : TensorType; +} + +table RankOptions { +} + +table PowOptions { +} + +table FakeQuantOptions { + // Parameters supported by version 1: + min:float; + max:float; + num_bits:int; + + // Parameters supported by version 2: + narrow_range:bool; +} + +table PackOptions { + values_count:int; + axis:int; +} + +table LogicalOrOptions { +} + +table OneHotOptions { + axis:int; +} + +table AbsOptions { +} + + +table HardSwishOptions { +} + +table LogicalAndOptions { +} + +table LogicalNotOptions { +} + +table UnpackOptions { + num:int; + axis:int; +} + +table FloorDivOptions { +} + +table SquareOptions { +} + +table ZerosLikeOptions { +} + +table FillOptions { +} + +table FloorModOptions { +} + +table RangeOptions { +} + +table LeakyReluOptions { + alpha:float; +} + +table SquaredDifferenceOptions { +} + +enum MirrorPadMode : byte { + // Doesn't include borders. + REFLECT = 0, + // Includes borders. + SYMMETRIC = 1, +} + +table MirrorPadOptions { + mode:MirrorPadMode; +} + +table UniqueOptions { + idx_out_type:TensorType = INT32; +} + +table ReverseV2Options { +} + +table AddNOptions { +} + +table GatherNdOptions { +} + +table WhereOptions { +} + +table ReverseSequenceOptions { + seq_dim:int; + batch_dim:int = 0; +} + +table MatrixDiagOptions { +} + +table QuantizeOptions { +} + +table MatrixSetDiagOptions { +} + +table IfOptions { + then_subgraph_index:int; + else_subgraph_index:int; +} + +table CallOnceOptions { + init_subgraph_index:int; +} + +table WhileOptions { + cond_subgraph_index:int; + body_subgraph_index:int; +} + +table NonMaxSuppressionV4Options { +} + +table NonMaxSuppressionV5Options { +} + +table ScatterNdOptions { +} + +table SelectV2Options { +} + +table DensifyOptions { +} + +table SegmentSumOptions { +} + +table BatchMatMulOptions { + adj_x:bool; + adj_y:bool; + // Parameters for BatchMatMul version 4 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table CumsumOptions { + exclusive:bool; + reverse:bool; +} + +table BroadcastToOptions { +} + +table Rfft2dOptions { +} + +table HashtableOptions { + // The identity of hash tables. This identity will be used across different + // subgraphs in the same interpreter instance. + table_id:int; + key_dtype:TensorType; + value_dtype:TensorType; +} + +table HashtableFindOptions { +} + +table HashtableImportOptions { +} + +table HashtableSizeOptions { +} + +table VarHandleOptions { + container:string; + shared_name:string; +} + +table ReadVariableOptions { +} + +table AssignVariableOptions { +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + // This field is for backward compatibility. This field will be used when + // the value of the extended builtin_code field has less than + // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + deprecated_builtin_code:byte; + custom_code:string; + + // The version of the operator. The version need to be bumped whenever new + // parameters are introduced into an op. + version:int = 1; + + // This field is introduced for resolving op builtin code shortage problem + // (the original BuiltinOperator enum field was represented as a byte). + // This field will be used when the value of the extended builtin_code field + // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + builtin_code:BuiltinOperator; +} + +enum CustomOptionsFormat : byte { + FLEXBUFFERS = 0, +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + // Optional input are indicated by -1. + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; + custom_options_format:CustomOptionsFormat; + + // A list of booleans indicating the input tensors which are being mutated by + // this operator.(e.g. used by RNN and LSTM). + // For example, if the "inputs" array refers to 5 tensors and the second and + // fifth are mutable variables, then this list will contain + // [false, true, false, false, true]. + // + // If the list is empty, no variable is mutated in this operator. + // The list either has the same length as `inputs`, or is empty. + mutating_variable_inputs:[bool]; + + // A list of indices to the subgraph's "tensors" that are internal to an Op. + // Internal tensors are those that do not flow in or out of the operation, + // but instead are part of internal computation. As such, the operation's + // implementation may manage its memory more efficiently. They are needed + // however (i.e. not just an implementation detail) since they are part of the + // computation, which may require relevant metadata such as quantization + // parameters. + intermediates:[int]; +} + +// The root type, defining a subgraph, which typically represents an entire +// model. +table SubGraph { + // A list of all tensors used in this subgraph. + tensors:[Tensor]; + + // Indices of the tensors that are inputs into this subgraph. Note this is + // the list of non-static tensors that feed into the subgraph for inference. + inputs:[int]; + + // Indices of the tensors that are outputs out of this subgraph. Note this is + // the list of output tensors that are considered the product of the + // subgraph's inference. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of this subgraph (used for debugging). + name:string; +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. The generous alignment accommodates mmap-friendly data structures. +table Buffer { + data:[ubyte] (force_align: 16); +} + +table Metadata { + // A human readable string to uniquely identify a Metadata. + name:string; + // An index to the buffers table. + buffer:uint; +} + +// Map from an alias name of tensor to tensor index in the graph. +// This is used in Signature def. +table TensorMap { + // Represents the alias to use for this tensor. + name:string; + + // The actual tensor index in the primary graph, that 'name' corresponds to. + tensor_index:uint; +} + +// This corresponds to SignatureDef in Tensorflow SavedModel. +// The SignatureDef will be part of the SavedModel provided for conversion. +table SignatureDef { + // Named inputs for this signature. + inputs:[TensorMap]; + + // Named outputs for this signature. + outputs:[TensorMap]; + + // Exported method name for this signature. + method_name:string; + + // Key value which was in the Tensorflow SavedModel SignatureDef map. + key:string; + + // Subgraph index of the exported method. + subgraph_index:uint; +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model. + // Note the 0th entry of this array must be an empty buffer (sentinel). + // This is a convention so that tensors without a buffer can provide 0 as + // their buffer. + buffers:[Buffer]; + + // Metadata about the model. Indirects into the existings buffers list. + // Deprecated, prefer to use metadata field. + metadata_buffer:[int]; + + // Metadata about the model. + metadata:[Metadata]; + + // Optional SignatureDefs for the model. + signature_defs:[SignatureDef]; +} + +root_type Model; diff --git a/res/TensorFlowLiteSchema/SCHEMA.lst b/res/TensorFlowLiteSchema/SCHEMA.lst index 73dfacd7b..609ef4b0b 100644 --- a/res/TensorFlowLiteSchema/SCHEMA.lst +++ b/res/TensorFlowLiteSchema/SCHEMA.lst @@ -6,3 +6,4 @@ VERSION,URL 2.2.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.2.0/tensorflow/lite/schema/schema.fbs 2.3.0-rc0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0-rc0/tensorflow/lite/schema/schema.fbs 2.3.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0/tensorflow/lite/schema/schema.fbs +2.6.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.6.0/tensorflow/lite/schema/schema.fbs diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle index 2e3955c3a..b432929b5 100644 --- a/runtime/contrib/android/api/build.gradle +++ b/runtime/contrib/android/api/build.gradle @@ -8,7 +8,7 @@ android { minSdkVersion 26 targetSdkVersion 29 versionCode 1 - versionName "1.17.0" + versionName "1.18.0" externalNativeBuild { ndkBuild { diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt new file mode 100644 index 000000000..f88f13186 --- /dev/null +++ b/runtime/libs/ndarray/CMakeLists.txt @@ -0,0 +1,23 @@ +add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp) + +set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON) + +target_include_directories(ndarray PUBLIC include) +#can't make this private because of c++ templates +target_include_directories(ndarray PUBLIC src) + +option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types") + +if(${NDARRAY_INLINE_TEMPLATES}) + target_compile_definitions(ndarray PUBLIC -DNDARRAY_INLINE_TEMPLATES=1) +endif() + +target_link_libraries(ndarray PRIVATE nnfw_common) +target_link_libraries(ndarray PRIVATE nnfw_coverage) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +add_subdirectory(test) +add_subdirectory(example) diff --git a/runtime/libs/ndarray/example/CMakeLists.txt b/runtime/libs/ndarray/example/CMakeLists.txt new file mode 100644 index 000000000..c4b575dad --- /dev/null +++ b/runtime/libs/ndarray/example/CMakeLists.txt @@ -0,0 +1,4 @@ +add_executable(example_no_array example_no_array.cpp) + +add_executable(example_array example_array.cpp) +target_link_libraries(example_array PRIVATE ndarray) diff --git a/runtime/libs/ndarray/example/example_array.cpp b/runtime/libs/ndarray/example/example_array.cpp new file mode 100644 index 000000000..85d274681 --- /dev/null +++ b/runtime/libs/ndarray/example/example_array.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ndarray/Array.h" + +#include <iostream> +#include <iterator> + +using namespace ndarray; + +void gather_array(const Array<float> &input, Array<float> &output, const Array<int> &indices) +{ + assert(indices.shape().rank() == 3); + assert(input.shape().rank() == 3); + assert(indices.shape().dim(1) == input.shape().rank()); + + for (size_t i = 0; i < indices.shape().dim(0); ++i) + { + for (size_t j = 0; j < indices.shape().dim(1); ++j) + { + auto index = indices.slice(i, j); + output.slice(i, j).assign(input.slice(index[0], index[1])); + } + } +} + +int main() +{ + // fill tensor of shape[3,3,4] with sequential numbers from [0..36) + Shape in_shape{3, 3, 4}; + std::vector<float> input_data(in_shape.element_count()); + for (size_t i = 0; i < in_shape.element_count(); ++i) + input_data[i] = i; + + Array<float> input(input_data.data(), in_shape); + + // select column-vectors on main diagonal + Shape indices_shape{1, 3, 2}; + std::vector<int> indices_data(indices_shape.element_count()); + Array<int> indices(indices_data.data(), indices_shape); + + indices.slice(0, 0) = {0, 0}; + indices.slice(0, 1) = {1, 1}; + indices.slice(0, 2) = {2, 2}; + + Shape output_shape{1, 3, 4}; + std::vector<float> output_data(output_shape.element_count()); + + Array<float> output(output_data.data(), output_shape); + + gather_array(input, output, indices); + + for (size_t i = 0; i < indices_shape.dim(0); ++i) + { + for (size_t j = 0; j < indices_shape.dim(1); ++j) + { + auto output_piece = output.slice(i, j); + std::ostream_iterator<int> cout_it(std::cout, ", "); + std::copy(output_piece.begin(), output_piece.end(), cout_it); + std::cout << std::endl; + } + } +} diff --git a/runtime/libs/ndarray/example/example_no_array.cpp b/runtime/libs/ndarray/example/example_no_array.cpp new file mode 100644 index 000000000..3a4d05dca --- /dev/null +++ b/runtime/libs/ndarray/example/example_no_array.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <array> +#include <vector> +#include <algorithm> +#include <cassert> +#include <iostream> + +void gather_no_array(const float *in_data, const std::array<size_t, 3> &dims, float *out_data, + const std::array<size_t, 3> &out_dims, //[nselections, + const int *indices, const std::array<size_t, 3> &indices_dims) +{ + assert(indices_dims[1] == dims.size()); + + for (int i = 0; i < indices_dims[0]; ++i) + { + for (int j = 0; j < indices_dims[1]; ++j) + { + const int *index_ptr = indices + i * indices_dims[2] * indices_dims[1] + j * indices_dims[2]; + + size_t in_offset = index_ptr[0] * dims[2] * dims[1] + index_ptr[1] * dims[2]; + + const float *in_ptr = in_data + in_offset; + + size_t out_offset = i * out_dims[2] * out_dims[1] + j * out_dims[2]; + + float *out_ptr = out_data + out_offset; + + for (int k = 0; k < dims[2]; ++k) + { + out_ptr[k] = in_ptr[k]; + } + } + } +} + +int main() +{ + std::array<size_t, 3> in_dims{3, 3, 4}; + std::vector<float> input(3 * 3 * 4); + for (size_t i = 0; i < 3 * 3 * 4; ++i) + input[i] = i; + + std::array<size_t, 3> indices_shape{1, 3, 2}; + std::vector<int> indices(1 * 3 * 2); + + indices[0] = 0; + indices[1] = 0; + indices[2] = 1; + indices[3] = 1; + indices[4] = 2; + indices[5] = 2; + + std::array<size_t, 3> output_dims{1, 3, 4}; + std::vector<float> output(1 * 3 * 4); + + gather_no_array(input.data(), in_dims, output.data(), output_dims, indices.data(), indices_shape); + + for (size_t i = 0; i < output_dims[0]; ++i) + { + for (size_t j = 0; j < output_dims[1]; ++j) + { + auto out_ptr = output.data() + i * output_dims[1] * output_dims[2] + j * output_dims[2]; + for (size_t k = 0; k < output_dims[2]; ++k) + { + std::cout << out_ptr[k] << ", "; + } + std::cout << std::endl; + } + } +} diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h new file mode 100644 index 000000000..09e791763 --- /dev/null +++ b/runtime/libs/ndarray/include/ndarray/Array.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NDARRAY_ARRAY_H_ +#define _NDARRAY_ARRAY_H_ + +#include "Common.h" + +#include "ContiguousSpan.h" +#include "Shape.h" + +#if __cplusplus < 201402L +#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions +#else +#include <utility> +#endif + +#include <algorithm> +#include <cassert> +#include <type_traits> +#include <array> +#include <tuple> +#include <cstddef> + +namespace ndarray +{ + +// there is no index_sequence before c++14 +#if __cplusplus < 201402L + +template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>; + +template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>; + +#else + +template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>; + +template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>; + +#endif //__cplusplus < 201402L + +struct Strides +{ + explicit Strides(Shape s) : _strides{} { fillStrides(s); } + + int operator[](size_t idx) const noexcept { return _strides[idx]; } + + // since we don't have c++14 fold expression + template <typename Seq, typename... Ts> struct _calc_offset; + + template <size_t Num, size_t... Nums, typename T, typename... Ts> + struct _calc_offset<index_sequence<Num, Nums...>, T, Ts...> + { + static constexpr size_t get(const std::array<int, 8> &strides, int x, Ts... xs) + { + return _calc_offset<index_sequence<Nums...>, Ts...>::get(strides, xs...) + + x * std::get<Num>(strides); + } + }; + + template <size_t Num, typename T> struct _calc_offset<index_sequence<Num>, T> + { + static constexpr size_t get(const std::array<int, 8> &strides, int x) + { + return x * std::get<Num>(strides); + } + }; + + template <typename Seq, typename... Ts> constexpr size_t offset(Seq, Ts... x) const noexcept + { + // return ( 0 + ... + (std::get<Nums>(_strides) * x)); in c++14 + return _calc_offset<Seq, Ts...>::get(_strides, x...); + } + +private: + void fillStrides(const Shape &s) noexcept + { + int rank = s.rank(); + _strides[rank - 1] = 1; + for (int d = rank - 2; d >= 0; --d) + { + _strides[d] = _strides[d + 1] * s.dim(d + 1); + } + } + + std::array<int, NDARRAY_MAX_DIMENSION_COUNT> _strides; +}; + +template <typename T> class Array +{ +public: + Array(T *data, Shape shape) noexcept : _data(data), _shape(shape), _strides(shape) {} + + Array(const Array &) = delete; + + Array(Array &&a) noexcept : _data(a._data), _shape(a._shape), _strides(a._strides) + { + a._data = nullptr; + } + + template <typename... Ts> T &at(Ts... x) const noexcept { return _at(static_cast<size_t>(x)...); } + + /** + * @brief returns last dimension as ContigniousSpan + * @param x indices of slice to take. See tests for usage details + * @return slice at given position + */ + template <typename... Ts> ContiguousSpan<T, std::is_const<T>::value> slice(Ts... x) noexcept + { + assert(sizeof...(Ts) == _shape.rank() - 1); + return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)}; + } + + /** + * @brief returns last dimension as ContigniousSpan + * @param x indices of slice to take. See tests for usage details + * @return slice at given position + */ + template <typename... Ts> ContiguousSpan<T, true> slice(Ts... x) const noexcept + { + assert(sizeof...(Ts) == _shape.rank() - 1); + return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)}; + } + + ContiguousSpan<T, std::is_const<T>::value> flat() noexcept + { + return {_data, _shape.element_count()}; + } + + ContiguousSpan<T, true> flat() const noexcept { return {_data, _shape.element_count()}; } + + const Shape &shape() const noexcept { return _shape; } + +private: + template <typename... Ts> T &_at(Ts... x) const noexcept + { + assert(sizeof...(x) == _shape.rank()); + using Indices = make_index_sequence<sizeof...(Ts)>; + return _data[offset(Indices{}, x...)]; + } + + template <typename... Ts, size_t... Nums> + size_t offset(index_sequence<Nums...> seq, Ts... x) const noexcept + { + static_assert( + sizeof...(Ts) == sizeof...(Nums), + "Sanity check failed. Generated index sequence size is not equal to argument count"); + + return _strides.offset(seq, x...); + } + + T *_data; + Shape _shape; + Strides _strides; +}; + +template <typename To, typename From> Array<To> array_cast(Array<From> &&from, Shape newShape) +{ + assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count()); + return Array<To>(reinterpret_cast<To *>(from.flat().data()), newShape); +} + +template <typename To, typename From> +Array<const To> array_cast(const Array<From> &from, Shape newShape) +{ + assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count()); + return Array<To>(reinterpret_cast<const To *>(from.flat().data()), newShape); +} + +#ifndef NDARRAY_INLINE_TEMPLATES + +extern template class Array<float>; +extern template class Array<int32_t>; +extern template class Array<uint32_t>; +extern template class Array<uint8_t>; + +#endif // NDARRAY_INLINE_TEMPLATES + +} // namespace ndarray + +#endif //_NDARRAY_ARRAY_H_ diff --git a/runtime/libs/ndarray/include/ndarray/Common.h b/runtime/libs/ndarray/include/ndarray/Common.h new file mode 100644 index 000000000..aa0cc6fe2 --- /dev/null +++ b/runtime/libs/ndarray/include/ndarray/Common.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NDARRAY_COMMON_H_ +#define _NDARRAY_COMMON_H_ + +#define NDARRAY_MAX_DIMENSION_COUNT 8 + +#endif //_NDARRAY_COMMON_H_ diff --git a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h new file mode 100644 index 000000000..b322b77db --- /dev/null +++ b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NDARRAY_CONTIGNIOUS_SPAN_H_ +#define _NDARRAY_CONTIGNIOUS_SPAN_H_ + +#include <type_traits> +#include <vector> +#include <cstdint> +#include <cstddef> +#include <cassert> + +namespace ndarray +{ + +template <typename T, bool isConst = false> class ContiguousSpan +{ +public: + using pointer_type = typename std::conditional<isConst, const T *, T *>::type; + using reference_type = typename std::conditional<isConst, const T &, T &>::type; + using iterator_type = pointer_type; + + ContiguousSpan(pointer_type data, size_t len) noexcept : _data(data), _len(len) {} + + template <typename It> + explicit ContiguousSpan(It first, It last) noexcept + : _data(&*first), _len(std::distance(first, last)) + { + } + + ContiguousSpan(const ContiguousSpan &) = delete; + + ContiguousSpan(ContiguousSpan &&s) noexcept : _data(s._data), _len(s._len) { s._data = nullptr; } + + operator ContiguousSpan<T, true>() { return ContiguousSpan<T, true>{_data, _len}; } + + reference_type operator[](size_t idx) const noexcept { return _data[idx]; } + + reference_type at(size_t idx) const noexcept { return _data[idx]; } + + ContiguousSpan<T, isConst> offset(size_t offset) + { + assert(offset <= _len); + return {_data + offset, _len - offset}; + } + + template <typename From, bool _ = isConst> + typename std::enable_if<!_, void>::type assign(const From &f) noexcept + { + assignFrom(std::begin(f), std::end(f)); + } + + template <typename U, bool _ = isConst> + typename std::enable_if<!_, ContiguousSpan &>::type + operator=(std::initializer_list<U> list) noexcept + { + assignFrom(std::begin(list), std::end(list)); + return *this; + } + + template <typename It, bool _ = isConst> + typename std::enable_if<!_, void>::type assignFrom(It first, It last) noexcept + { + std::copy(first, last, begin()); + } + + size_t size() const { return _len; } + + iterator_type begin() const { return iterator_type{_data}; } + + iterator_type end() const { return iterator_type{_data + _len}; } + + pointer_type data() { return _data; } + +private: + pointer_type _data; + size_t _len; +}; + +#ifndef NDARRAY_INLINE_TEMPLATES + +extern template class ContiguousSpan<float, true>; +extern template class ContiguousSpan<float, false>; +extern template class ContiguousSpan<int32_t, true>; +extern template class ContiguousSpan<int32_t, false>; +extern template class ContiguousSpan<uint32_t, true>; +extern template class ContiguousSpan<uint32_t, false>; +extern template class ContiguousSpan<uint8_t, true>; +extern template class ContiguousSpan<uint8_t, false>; + +#endif // NDARRAY_INLINE_TEMPLATES + +} // namespace ndarray + +#endif //_NDARRAY_CONTIGNIOUS_SPAN_H_ diff --git a/runtime/libs/ndarray/include/ndarray/Shape.h b/runtime/libs/ndarray/include/ndarray/Shape.h new file mode 100644 index 000000000..fa58613b8 --- /dev/null +++ b/runtime/libs/ndarray/include/ndarray/Shape.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NDARRAY_SHAPE_H_ +#define _NDARRAY_SHAPE_H_ + +#include "Common.h" + +#include <array> +#include <cassert> +#include <cstddef> + +namespace ndarray +{ + +class Shape +{ +public: + //_dims{} here and later since array does not have std::initializer_list ctor + // and aggregate initialization is not allowed here + explicit Shape(size_t rank) noexcept : _dims{}, _rank(rank) + { + std::fill(_dims.begin(), _dims.end(), 0); + } + + Shape(std::initializer_list<size_t> list) noexcept : _dims{}, _rank(list.size()) + { + std::copy(list.begin(), list.end(), _dims.begin()); + } + + size_t dim(int i) const noexcept { return _dims.at(i); } + + size_t &dim(int i) noexcept { return _dims.at(i); } + + size_t element_count() const noexcept + { + uint32_t res = 1; + for (size_t i = 0; i < rank(); ++i) + res *= dim(i); + assert(res <= 0xffffffff); + return res; + } + + size_t rank() const noexcept { return _rank; } + +private: + std::array<size_t, NDARRAY_MAX_DIMENSION_COUNT> _dims; + size_t _rank; +}; + +} // namespace ndarray + +#endif //_NDARRAY_SHAPE_H_ diff --git a/runtime/libs/ndarray/src/Array.cpp b/runtime/libs/ndarray/src/Array.cpp new file mode 100644 index 000000000..f9c9de9d3 --- /dev/null +++ b/runtime/libs/ndarray/src/Array.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ndarray/Array.h" + +namespace ndarray +{ + +template class Array<float>; +template class Array<int32_t>; +template class Array<uint32_t>; +template class Array<uint8_t>; + +} // namespace ndarray diff --git a/runtime/libs/ndarray/src/ContiguousSpan.cpp b/runtime/libs/ndarray/src/ContiguousSpan.cpp new file mode 100644 index 000000000..e06cfc2a1 --- /dev/null +++ b/runtime/libs/ndarray/src/ContiguousSpan.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ndarray/ContiguousSpan.h" + +namespace ndarray +{ + +template class ContiguousSpan<float, true>; +template class ContiguousSpan<float, false>; +template class ContiguousSpan<int32_t, true>; +template class ContiguousSpan<int32_t, false>; +template class ContiguousSpan<uint32_t, true>; +template class ContiguousSpan<uint32_t, false>; +template class ContiguousSpan<uint8_t, true>; +template class ContiguousSpan<uint8_t, false>; + +} // namespace ndarray diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h new file mode 100644 index 000000000..8b78fb985 --- /dev/null +++ b/runtime/libs/ndarray/src/detail/cxx14.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _NDARRAY_CXX14_H_ +#define _NDARRAY_CXX14_H_ + +namespace ndarray +{ + +namespace cxx14 +{ + +template <size_t... Nums> struct index_sequence +{ + using value_type = size_t; + + static constexpr std::size_t size() noexcept { return sizeof...(Nums); } +}; + +namespace detail +{ + +template <size_t v, typename Seq> struct _append; + +template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>> +{ + using result = index_sequence<Nums..., v>; +}; + +template <size_t Len> struct make_index_sequence +{ + using result = + typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result; +}; + +template <> struct make_index_sequence<1> +{ + using result = index_sequence<0>; +}; + +template <> struct make_index_sequence<0> +{ + using result = index_sequence<>; +}; + +} // namespace detail + +template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result; + +} // namespace cxx14 + +} // namespace ndarray + +#endif //_NDARRAY_CXX14_H_ diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt new file mode 100644 index 000000000..be1ed6510 --- /dev/null +++ b/runtime/libs/ndarray/test/CMakeLists.txt @@ -0,0 +1,18 @@ +if(NOT TARGET ndarray) + return() +endif() + +add_executable(ndarray_test ndarray_test.cpp) + +target_link_libraries(ndarray_test PRIVATE ndarray) + +nnfw_find_package(GTest) +if(NOT GTest_FOUND) + message(STATUS "GTest not avaialble. Skipping NDArray test build") + return() +endif(NOT GTest_FOUND) + +target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD}) + +add_test(ndarray_test ndarray_test) +install(TARGETS ndarray_test DESTINATION unittest_standalone) diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp new file mode 100644 index 000000000..4b5ad5765 --- /dev/null +++ b/runtime/libs/ndarray/test/ndarray_test.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" + +#include "ndarray/Array.h" + +using namespace ndarray; + +TEST(NDArray_tests, basic_data_test) +{ + + float raw_data[] = {1, 2, 3, 4}; + + Array<float> data22{raw_data, {2, 2}}; + + ASSERT_FLOAT_EQ(data22.at(0, 0), 1); + ASSERT_FLOAT_EQ(data22.at(0, 1), 2); + ASSERT_FLOAT_EQ(data22.at(1, 0), 3); + ASSERT_FLOAT_EQ(data22.at(1, 1), 4); + ASSERT_EQ(data22.shape().rank(), 2); + ASSERT_EQ(data22.shape().dim(0), 2); + ASSERT_EQ(data22.shape().dim(1), 2); + + Array<float> data14{raw_data, {1, 4}}; + ASSERT_FLOAT_EQ(data14.at(0, 0), 1); + ASSERT_FLOAT_EQ(data14.at(0, 1), 2); + ASSERT_FLOAT_EQ(data14.at(0, 2), 3); + ASSERT_FLOAT_EQ(data14.at(0, 3), 4); + ASSERT_EQ(data14.shape().rank(), 2); + ASSERT_EQ(data14.shape().dim(0), 1); + ASSERT_EQ(data14.shape().dim(1), 4); + + ContiguousSpan<float> cs = data22.flat(); + ASSERT_EQ(cs.size(), 4); + ASSERT_FLOAT_EQ(cs.at(3), 4); + + Array<float> lv = std::move(data14); + ASSERT_FLOAT_EQ(lv.at(0, 0), 1); + ASSERT_FLOAT_EQ(lv.at(0, 1), 2); + ASSERT_FLOAT_EQ(lv.at(0, 2), 3); + ASSERT_FLOAT_EQ(lv.at(0, 3), 4); +} + +TEST(NDArray_tests, slice_write_test) +{ + float raw_data[4] = {0}; + + Array<float> data22{raw_data, {2, 2}}; + + data22.slice(1) = {1, 2}; + + ASSERT_FLOAT_EQ(data22.at(0, 0), 0); + ASSERT_FLOAT_EQ(data22.at(0, 1), 0); + ASSERT_FLOAT_EQ(data22.at(1, 0), 1); + ASSERT_FLOAT_EQ(data22.at(1, 1), 2); +} + +TEST(NDArray_tests, slice_read_test) +{ + float raw_data[4] = {1, 2, 3, 4}; + + Array<float> data22{raw_data, {2, 2}}; + + auto slice = data22.slice(1); + + ASSERT_FLOAT_EQ(slice[0], 3); + ASSERT_FLOAT_EQ(slice[1], 4); +} + +TEST(NDArray_tests, multidim_test) +{ + float raw_data[5] = {0, 1, 2, 3, 4}; + + Array<float> data22{raw_data, {1, 1, 1, 1, 5}}; + + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3); + ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4); +} + +TEST(NDArray_tests, slice_assign_test) +{ + std::vector<float> v1{1, 2, 3, 4, 5}; + std::vector<float> v2(5); + + ContiguousSpan<float> span1(v1.begin(), v1.end()); + ContiguousSpan<float> span2(v2.begin(), v2.end()); + + span2.assign(span1); + + ASSERT_EQ(v1, v2); + ASSERT_EQ(span1.size(), 5); + ASSERT_EQ(span2.size(), 5); + + ASSERT_EQ(span2.at(2), 3); + ASSERT_EQ(span2.at(4), 5); + + ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2)); + + ContiguousSpan<float> span3(span2.offset(1)); + ASSERT_EQ(span3.size(), 4); + ASSERT_EQ(span3.at(0), 2); + ASSERT_EQ(span3.at(1), 3); + ASSERT_EQ(span3.at(2), 4); + ASSERT_EQ(span3.at(3), 5); +} diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 6624ae676..4fce291a0 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01001100 +#define NNFW_VERSION 0x01001200 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt index 1e5443263..b61e58251 100644 --- a/runtime/onert/backend/cpu/CMakeLists.txt +++ b/runtime/onert/backend/cpu/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage) target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy) target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation) +target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ndarray) set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu) diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 59fb68d55..75274dc88 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -35,6 +35,7 @@ #include "ops/GatherLayer.h" #include "ops/LSTMLayer.h" #include "ops/MeanLayer.h" +#include "ops/DetectionPostProcessLayer.h" #include "ops/OneHotLayer.h" #include "ops/OperationUtils.h" #include "ops/PackLayer.h" @@ -1177,6 +1178,51 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node) _return_fn = std::move(fn); } +void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node) +{ + using NMS = ir::operation::DetectionPostProcess; + + ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters; + parameters.scales.y = node.param().scale.y_scale; + parameters.scales.x = node.param().scale.x_scale; + parameters.scales.w = node.param().scale.w_scale; + parameters.scales.h = node.param().scale.h_scale; + + parameters.iou_threshold = node.param().iou_threshold; + parameters.score_threshold = node.param().score_threshold; + parameters.max_boxes_per_class = node.param().max_boxes_per_class; + parameters.max_detections = node.param().max_detections; + parameters.num_classes = node.param().num_classes; + parameters.center_box_format = node.param().center_size_boxes; + parameters.max_classes_per_detection = node.param().max_classes_per_detection; + + auto boxes_index = node.getInputs().at(NMS::Input::BOXES); + auto scores_index = node.getInputs().at(NMS::Input::SCORES); + auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS); + + auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES); + auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS); + auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES); + auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED); + + parameters.boxes_descr = _ctx.at(boxes_index).shape().dims(); + parameters.scrores_descr = _ctx.at(scores_index).shape().dims(); + + parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index); + parameters.scores_input = _tensor_reg->getPortableTensor(scores_index); + parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index); + + parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index); + parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index); + parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index); + parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index); + + auto fn = std::make_unique<ops::DetectionPostProcessLayer>(); + fn->configure(std::move(parameters)); + + _return_fn = std::move(fn); +} + void KernelGenerator::visit(const ir::operation::BatchMatMul &node) { const auto output_index{node.getOutputs().at(0)}; diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index d452d0ba6..d7d5fe6fc 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -69,6 +69,7 @@ public: void visit(const ir::operation::LogSoftmax &) override; void visit(const ir::operation::LSTM &) override; void visit(const ir::operation::MatrixBandPart &) override; + void visit(const ir::operation::DetectionPostProcess &) override; void visit(const ir::operation::OneHot &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Pad &) override; diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc new file mode 100644 index 000000000..8a6fe6504 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DetectionPostProcessLayer.h" + +#include "ndarray/Array.h" + +#include <numeric> +#include <utility> +#include <cmath> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ + +using namespace ndarray; + +using CenterSizeBox = DetectionPostProcessLayer::CenterSizeBox; +using CornerBox = DetectionPostProcessLayer::CornerBox; + +using NonMaxSuppressionParam = DetectionPostProcessLayer::DetectionPostProcessParameters; +using Allocations = DetectionPostProcessLayer::Allocations; + +struct OutputArrays +{ + OutputArrays(CornerBox *coords_buf, float *scores_buf, float *classes_buf, + int *num_selections_buf, size_t max_detections) + : coords(coords_buf, {max_detections}), scores(scores_buf, {max_detections}), + classes(classes_buf, {max_detections}), num_selections(num_selections_buf, {1}) + { + } + + Array<CornerBox> coords; + Array<float> scores; + Array<float> classes; + Array<int> num_selections; +}; + +struct TemporaryArrays +{ + TemporaryArrays(int *selections_buffer, int max_detections) + : selections(selections_buffer, {static_cast<unsigned long>(max_detections)}) + { + } + + Array<int> selections; +}; + +// sort indices in decreasing order of first `k` scores +void PartialArgSort(const ContiguousSpan<float, true> &scores, + const ContiguousSpan<int, false> &indices, int k) +{ + std::iota(indices.begin(), indices.begin() + k, 0); + std::partial_sort(indices.begin(), indices.begin() + k, indices.begin() + scores.size(), + [&scores](const int i, const int j) { return scores[i] > scores[j]; }); +} + +template <typename T> ContiguousSpan<T, false> static vecToSpan(std::vector<T> &v) +{ + return ContiguousSpan<T, false>{v.begin(), v.end()}; +} + +Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<float> &raw_anchors, + bool center_box_format, const CenterSizeBox &scales) +{ + auto nbatches = raw_boxes.shape().dim(0); + auto num_boxes = raw_boxes.shape().dim(1); + + auto anchors = array_cast<const CenterSizeBox>(raw_anchors, {num_boxes}); + + if (!center_box_format) + { + auto boxes_p = reinterpret_cast<const CornerBox *>(raw_boxes.flat().data()); + return {boxes_p, {num_boxes}}; + } + else + { + // TODO support box center-width encoding correctly + // i.e anchors + auto boxes_p = reinterpret_cast<const CenterSizeBox *>(raw_boxes.flat().data()); + Array<const CenterSizeBox> in_boxes{boxes_p, {num_boxes}}; + + auto decoded_boxes_p = new CornerBox[nbatches * num_boxes]; + Array<CornerBox> decoded_boxes_a{decoded_boxes_p, {num_boxes}}; + + for (size_t i = 0; i < num_boxes; ++i) + { + auto anchor = anchors.at(i); + auto &box = decoded_boxes_a.at(i); + float yc = in_boxes.at(i).y / scales.y * anchor.h + anchor.y; + float xc = in_boxes.at(i).x / scales.x * anchor.w + anchor.x; + float halfh = 0.5f * std::exp(in_boxes.at(i).h / scales.h) * anchor.h; + float halfw = 0.5f * std::exp(in_boxes.at(i).w / scales.w) * anchor.w; + box.x1 = xc - halfw; + box.x2 = xc + halfw; + box.y1 = yc - halfh; + box.y2 = yc + halfh; + + assert(box.x2 > box.x1); + assert(box.y2 > box.y1); + } + + return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape()); + } +} + +float computeIOU(const CornerBox &box1, const CornerBox &box2) +{ + float area_i = (box1.y2 - box1.y1) * (box1.x2 - box1.x1); + float area_j = (box2.y2 - box2.y1) * (box2.x2 - box2.x1); + if (area_i <= 0 || area_j <= 0) + { + return 0.0; + } + float in_ymin = std::max<float>(box1.y1, box2.y1); + float in_xmin = std::max<float>(box1.x1, box2.x1); + float in_ymax = std::min<float>(box1.y2, box2.y2); + float in_xmax = std::min<float>(box1.x2, box2.x2); + float in_area = std::max<float>(in_ymax - in_ymin, 0.0) * std::max<float>(in_xmax - in_xmin, 0.0); + + return in_area / (area_i + area_j - in_area); +} + +int doSingleClass(const Array<const CornerBox> &boxes, const std::vector<float> &scores, + const NonMaxSuppressionParam ¶m, TemporaryArrays &temps, + size_t max_detections) +{ + auto num_boxes = boxes.shape().dim(0); + + std::vector<int> sorted_box_indices(num_boxes); + PartialArgSort(ContiguousSpan<float, true>(scores.data(), num_boxes), + vecToSpan(sorted_box_indices), num_boxes); + + // TODO move to temp allocations + std::vector<int> process_box(num_boxes, 1); + + size_t selected_count = 0; + for (size_t i = 0; i < num_boxes; ++i) + { + auto box_index = sorted_box_indices[i]; + + if (!process_box[box_index] || scores[box_index] < param.score_threshold) + { + continue; + } + + temps.selections.at(selected_count) = box_index; + selected_count++; + + if (selected_count >= max_detections) + { + break; + } + + for (size_t j = i + 1; j < num_boxes; ++j) + { + if (!process_box[sorted_box_indices[j]]) + { + continue; + } + + float IOU = computeIOU(boxes.at(box_index), boxes.at(sorted_box_indices[j])); + if (IOU > param.iou_threshold) + { + process_box[sorted_box_indices[j]] = 0; + } + } + } + + return selected_count; +} + +void collectBoxes(TemporaryArrays &temporary, const Array<const CornerBox> &decoded_boxes, + std::vector<float> &scores, int num_selected, OutputArrays &output, + const Array<int> &sorted_classes, int detections_per_box) +{ + auto &selections = temporary.selections; + + size_t output_box_count = 0; + + for (int i = 0; i < num_selected; ++i) + { + int selected_box = selections.at(output_box_count); + + for (int c = 0; c < detections_per_box; ++c) + { + output.classes.at(output_box_count) = sorted_classes.at(selected_box, c); + output.scores.at(output_box_count) = scores[selected_box]; + output.coords.at(output_box_count) = decoded_boxes.at(selected_box); + output_box_count++; + } + } +} + +void DetectionPostProcess(const Array<float> &boxes_a, const Array<float> &scores_a, + Array<float> &num_selected_a, const NonMaxSuppressionParam ¶m, + const Allocations &allocations, OutputArrays &outputs) +{ + TemporaryArrays temporary(allocations.selections_buffer, param.max_detections); + + // Only batch of 1 is supported atm + auto num_boxes = boxes_a.shape().dim(1); + size_t num_classes = param.num_classes; + size_t num_classes_with_background = scores_a.shape().dim(2); + bool have_background = num_classes_with_background != num_classes; + + size_t max_classes_per_box = std::min<size_t>(num_classes, param.max_classes_per_detection); + + // TODO move this to allocations + std::vector<int> sorted_class_indices(num_boxes * num_classes); + + Array<int> class_indices(sorted_class_indices.data(), {num_boxes, num_classes}); + + // TODO move to allocations + std::vector<float> max_scores(num_boxes); + + for (size_t row = 0; row < num_boxes; row++) + { + auto box_scores = scores_a.slice(0, row).offset(have_background ? 1 : 0); + auto indices = class_indices.slice(row); + + PartialArgSort(box_scores, indices, num_classes); + + max_scores[row] = box_scores[indices[0]]; + } + + auto anchors_a = + Array<float>(reinterpret_cast<float *>(param.anchors_input->buffer()), {num_boxes, 4}); + auto decoded_boxes = decodeBoxes(boxes_a, anchors_a, param.center_box_format, param.scales); + + int num_selected = + doSingleClass(decoded_boxes, max_scores, param, temporary, param.max_detections); + + collectBoxes(temporary, decoded_boxes, max_scores, num_selected, outputs, class_indices, + max_classes_per_box); + + num_selected_a.at(0) = num_selected; +} +} // namespace + +template <typename T> Array<T> toArray(uint8_t *ptr, std::vector<int32_t> &descr) +{ + ndarray::Shape shape(descr.size()); + for (size_t i = 0; i < descr.size(); ++i) + { + shape.dim(i) = descr[i]; + } + + return Array<T>{reinterpret_cast<T *>(ptr), shape}; +} + +void DetectionPostProcessLayer::configure(DetectionPostProcessParameters parameters) +{ + _parameters = std::move(parameters); + _allocations.selections_buffer = new int[_parameters.max_detections * 2]; +} + +void DetectionPostProcessLayer::run() +{ + auto nbatches = (unsigned int)_parameters.boxes_descr[0]; + // no suport for batch other than 1( it's fine since tflite does not support + // batch for postprocess either ) + assert(nbatches == 1); + + auto boxes_a = toArray<float>(_parameters.boxes_input->buffer(), _parameters.boxes_descr); + auto scores_a = toArray<float>(_parameters.scores_input->buffer(), _parameters.scrores_descr); + + auto num_selected_a = ndarray::Array<float>( + reinterpret_cast<float *>(_parameters.num_selections_output->buffer()), {nbatches}); + + OutputArrays outputArrays(reinterpret_cast<CornerBox *>(_parameters.box_coords_output->buffer()), + reinterpret_cast<float *>(_parameters.box_scores_output->buffer()), + reinterpret_cast<float *>(_parameters.box_classes_output->buffer()), + reinterpret_cast<int *>(_parameters.num_selections_output->buffer()), + _parameters.max_detections); + + DetectionPostProcess(boxes_a, scores_a, num_selected_a, _parameters, _allocations, outputArrays); +} + +DetectionPostProcessLayer::~DetectionPostProcessLayer() { delete[] _allocations.selections_buffer; } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h new file mode 100644 index 000000000..836a70cac --- /dev/null +++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CPU_OPS_DPP_H__ +#define __ONERT_BACKEND_CPU_OPS_DPP_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +class DetectionPostProcessLayer : public ::onert::exec::IFunction +{ +public: + struct CornerBox + { + float y1, x1; + float y2, x2; + }; + + struct CenterSizeBox + { + float y, x; + float h, w; + }; + + struct DetectionPostProcessParameters + { + const IPortableTensor *boxes_input; + const IPortableTensor *scores_input; + const IPortableTensor *anchors_input; + IPortableTensor *box_coords_output; + IPortableTensor *box_classes_output; + IPortableTensor *box_scores_output; + IPortableTensor *num_selections_output; + std::vector<int32_t> boxes_descr; + std::vector<int32_t> scrores_descr; + + uint32_t max_detections; + float score_threshold; + float iou_threshold; // intersection-over-union + uint32_t max_boxes_per_class; + bool center_box_format = false; + int32_t num_classes; + int32_t max_classes_per_detection; + CenterSizeBox scales; + }; + + enum SelectionFormat + { + BOX_INDEX = 1, + CLASS_INDEX = 0 + }; + + struct Allocations + { + int *selections_buffer = nullptr; + // TODO move all dynamic allocations here, and into configure phase + }; + + DetectionPostProcessLayer() : _parameters{} + { + // DO NOTHING + } + + virtual ~DetectionPostProcessLayer(); + +public: + void configure(DetectionPostProcessParameters parameters); + + void run() override; + +private: + DetectionPostProcessParameters _parameters; + + Allocations _allocations; +}; + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_CPU_OPS_DPP_H__ diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h index 2e484e649..b2272e262 100644 --- a/runtime/onert/core/include/compiler/StaticShapeInferer.h +++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h @@ -112,6 +112,7 @@ private: void visit(const ir::operation::Transpose &op) override; void visit(const ir::operation::Unpack &op) override; void visit(const ir::operation::While &op) override; + void visit(const ir::operation::DetectionPostProcess &op) override; private: /** diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h index 3d040e2cc..f814b789a 100644 --- a/runtime/onert/core/include/exec/DynamicShapeInferer.h +++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h @@ -67,6 +67,7 @@ public: void visit(const ir::operation::L2Normalization &op) override; void visit(const ir::operation::LSTM &op) override; void visit(const ir::operation::MatrixBandPart &op) override; + void visit(const ir::operation::DetectionPostProcess &op) override; void visit(const ir::operation::OneHot &op) override; void visit(const ir::operation::Pack &op) override; void visit(const ir::operation::Pad &op) override; diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h index 45fadc474..0eb45e1ee 100644 --- a/runtime/onert/core/include/ir/Operations.Include.h +++ b/runtime/onert/core/include/ir/Operations.Include.h @@ -50,6 +50,7 @@ #include "ir/operation/LogSoftmax.h" #include "ir/operation/LSTM.h" #include "ir/operation/MatrixBandPart.h" +#include "ir/operation/DetectionPostProcess.h" #include "ir/operation/OneHot.h" #include "ir/operation/Pack.h" #include "ir/operation/Pad.h" diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst index 7f3c40b4b..f17fdfdd7 100644 --- a/runtime/onert/core/include/ir/Operations.lst +++ b/runtime/onert/core/include/ir/Operations.lst @@ -53,6 +53,7 @@ OP(LocalResponseNormalization) OP(LogSoftmax) OP(LSTM) OP(MatrixBandPart) +OP(DetectionPostProcess) OP(OneHot) OP(Pack) OP(Pad) diff --git a/runtime/onert/core/include/ir/operation/DetectionPostProcess.h b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h new file mode 100644 index 000000000..becb0e21a --- /dev/null +++ b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__ +#define __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__ + +#include "ir/Operation.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +class DetectionPostProcess : public Operation +{ +public: + enum Input + { + BOXES = 0, + SCORES = 1, + INPUT_ANCHORS = 2 + }; + + enum Output + { + BOX_COORDS = 0, + BOX_CLASSES = 1, + BOX_SCORES = 2, + NUM_SELECTED = 3 + }; + + struct Scale + { + float y_scale; + float x_scale; + float h_scale; + float w_scale; + }; + + struct Param + { + int max_detections; + float score_threshold; + float iou_threshold; // intersection-over-union + int max_boxes_per_class; + int32_t num_classes; + int32_t max_classes_per_detection; + // N*N complexity instead of N*N*M, where N - number of boxes and M number of classes + bool center_size_boxes; + bool do_fast_eval = true; + Scale scale; + }; + +public: + DetectionPostProcess(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); + +public: + void accept(OperationVisitor &v) const override; + + std::string getName() const { return "DetectionPostProcess"; } + +public: + const Param ¶m() const { return _param; } + OpCode opcode() const final { return OpCode::DetectionPostProcess; } + +private: + Param _param; +}; + +} // namespace operation +} // namespace ir +} // namespace onert + +#endif // __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__ diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc index 5849a9801..f2fee2c3c 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc @@ -1302,6 +1302,30 @@ void StaticShapeInferer::visit(const ir::operation::While &op) } } +void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op) +{ + // TODO: NMS supports very limited input/output size. + ir::operation::DetectionPostProcess::Param param = op.param(); + + const int num_detected_boxes = param.max_detections * param.max_classes_per_detection; + + const auto output_idx1 = op.getOutputs().at(0); + auto &output1 = _operands.at(output_idx1); + output1.info().shape({1, num_detected_boxes, 4}); + + const auto output_idx2 = op.getOutputs().at(1); + auto &output2 = _operands.at(output_idx2); + output2.info().shape({1, num_detected_boxes}); + + const auto output_idx3 = op.getOutputs().at(2); + auto &output3 = _operands.at(output_idx3); + output3.info().shape({1, num_detected_boxes}); + + const auto output_idx4 = op.getOutputs().at(3); + auto &output4 = _operands.at(output_idx4); + output4.info().shape({1}); +} + } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc index dbf4eb28f..fb8058d23 100644 --- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc +++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc @@ -601,6 +601,14 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT)); } +void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */) +{ + // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time + // by static shape inferer. + // DetectionPostProcess's outputs' shape are independent with input shape + // and decided by parameter value. +} + void DynamicShapeInferer::visit(const ir::operation::OneHot &op) { auto output_ind = op.getOutputs().at(0); diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc index 705a37e2c..094dbc0d5 100644 --- a/runtime/onert/core/src/ir/OperationValidator.cc +++ b/runtime/onert/core/src/ir/OperationValidator.cc @@ -211,6 +211,14 @@ void OperationValidator::visit(const operation::DepthToSpace &node) OP_REQUIRES(block_size > 0); } +void OperationValidator::visit(const operation::DetectionPostProcess &node) +{ + auto param = node.param(); + + // FIXME: number of classes should be 1 for now. + OP_REQUIRES(param.num_classes == 1); +} + void OperationValidator::visit(const operation::DepthwiseConv2D &node) { const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)}; diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h index 9829ca095..b9bcc4ee8 100644 --- a/runtime/onert/core/src/ir/OperationValidator.h +++ b/runtime/onert/core/src/ir/OperationValidator.h @@ -55,6 +55,7 @@ public: void visit(const operation::Conv2D &node) override; void visit(const operation::DepthToSpace &node) override; void visit(const operation::DepthwiseConv2D &node) override; + void visit(const operation::DetectionPostProcess &node) override; void visit(const operation::ElementwiseActivation &node) override; void visit(const operation::ElementwiseBinary &node) override; void visit(const operation::ElementwiseUnary &node) override; diff --git a/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc new file mode 100644 index 000000000..cd708796d --- /dev/null +++ b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/DetectionPostProcess.h" +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +DetectionPostProcess::DetectionPostProcess(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation(OperandConstraint::createExact(3u), inputs, outputs), _param(param) +{ +} + +void DetectionPostProcess::accept(OperationVisitor &v) const { v.visit(*this); } + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index c444e7365..6ba7ee922 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -142,6 +142,7 @@ private: void loadIf(const Operator *op, ir::Graph &subg); void loadLeakyRelu(const Operator *op, ir::Graph &subg); void loadLogSoftmax(const Operator *op, ir::Graph &subg); + void loadDetectionPostProcess(const Operator *op, ir::Graph &subg); void loadOneHot(const Operator *op, ir::Graph &subg); void loadPack(const Operator *op, ir::Graph &subg); void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type); @@ -928,6 +929,45 @@ void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg) } template <typename LoaderDomain> +void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg) +{ + const flexbuffers::Map &m = + flexbuffers::GetRoot(op->custom_options()->data(), op->custom_options()->size()).AsMap(); + + ir::operation::DetectionPostProcess::Param param; + + param.max_detections = m["max_detections"].AsInt32(); + + // TODO fixme + param.max_classes_per_detection = m["max_classes_per_detection"].AsInt32(); + if (m["detections_per_class"].IsNull()) + param.max_boxes_per_class = 100; + else + param.max_boxes_per_class = m["detections_per_class"].AsInt32(); + + if (m["use_regular_nms"].IsNull()) + param.do_fast_eval = true; + else + param.do_fast_eval = !m["use_regular_nms"].AsBool(); + + param.score_threshold = m["nms_score_threshold"].AsFloat(); + param.iou_threshold = m["nms_iou_threshold"].AsFloat(); + + // TODO add num classes support + param.num_classes = m["num_classes"].AsInt32(); + + param.scale.y_scale = m["y_scale"].AsFloat(); + param.scale.x_scale = m["x_scale"].AsFloat(); + param.scale.h_scale = m["h_scale"].AsFloat(); + param.scale.w_scale = m["w_scale"].AsFloat(); + + // TODO depends on input model framework + param.center_size_boxes = true; + + loadOperationTo<ir::operation::DetectionPostProcess>(op, subg, param); +} + +template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &subg) { ir::operation::BatchMatMul::Param param; @@ -997,7 +1037,8 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg) BroadcastTo, FusedBatchNorm, StatelessRandomUniform, - Erf + Erf, + DetectionPostProcess }; // Mapping from custom op name string to BuiltinOP enum @@ -1011,6 +1052,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg) {"BroadcastTo", BuiltinOP::BroadcastTo}, {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, {"Erf", BuiltinOP::Erf}, + {"TFLite_Detection_PostProcess", BuiltinOP::DetectionPostProcess}, }; try @@ -1046,6 +1088,9 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg) case BuiltinOP::Erf: loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF); break; + case BuiltinOP::DetectionPostProcess: + loadDetectionPostProcess(op, subg); + break; default: throw std::runtime_error{ "Loader: Custom OP map is defined but operation loader function is not defined"}; diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc index 579d68c86..0ffc8fb44 100644 --- a/tests/nnfw_api/src/CircleGen.cc +++ b/tests/nnfw_api/src/CircleGen.cc @@ -15,6 +15,7 @@ */ #include "CircleGen.h" +#include "flatbuffers/flexbuffers.h" CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph { @@ -189,6 +190,35 @@ uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams ¶ms, circle::BuiltinOptions_DepthwiseConv2DOptions, options); } +uint32_t CircleGen::addOperatorDetectionPostProcess(const OperatorParams ¶ms, int num_classes, + float y_scale, float x_scale, float h_scale, + float w_scale, float nms_score_threshold, + float nms_iou_threshold, int max_detections, + int max_classes_per_detection, + int detections_per_class) +{ + // flexbuffer custom_option + auto flex_buffers = std::make_unique<flexbuffers::Builder>(); + size_t map_start = flex_buffers->StartMap(); + flex_buffers->Int("num_classes", num_classes); + flex_buffers->Float("y_scale", y_scale); + flex_buffers->Float("x_scale", x_scale); + flex_buffers->Float("h_scale", h_scale); + flex_buffers->Float("w_scale", w_scale); + flex_buffers->Float("nms_iou_threshold", nms_iou_threshold); + flex_buffers->Float("nms_score_threshold", nms_score_threshold); + flex_buffers->Int("max_detections", max_detections); + flex_buffers->Int("max_classes_per_detection", max_classes_per_detection); + flex_buffers->Int("detections_per_class", detections_per_class); + flex_buffers->EndMap(map_start); + flex_buffers->Finish(); + + return addCustomOperatorWithOptions(params, "TFLite_Detection_PostProcess", + circle::BuiltinOptions_NONE, 0, &flex_buffers->GetBuffer(), + circle::CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS, + nullptr, nullptr); +} + uint32_t CircleGen::addOperatorElu(const OperatorParams ¶ms) { return addOperatorWithOptions(params, circle::BuiltinOperator_ELU, circle::BuiltinOptions_NONE, @@ -523,6 +553,23 @@ uint32_t CircleGen::addOperatorWithOptions(const OperatorParams ¶ms, return ind; } +uint32_t CircleGen::addCustomOperatorWithOptions( + const OperatorParams ¶ms, std::string custom_code, circle::BuiltinOptions options_type, + flatbuffers::Offset<void> options, const std::vector<uint8_t> *custom_options, + circle::CustomOptionsFormat custom_options_format, + const std::vector<uint8_t> *mutating_variable_inputs, const std::vector<int32_t> *intermediates) + +{ + uint32_t opcode_ind = addCustomOperatorCode(custom_code); + auto op = circle::CreateOperatorDirect( + _fbb, opcode_ind, ¶ms.inputs, ¶ms.outputs, options_type, options, custom_options, + custom_options_format, mutating_variable_inputs, intermediates); + + uint32_t ind = curSubgCtx().operators.size(); + curSubgCtx().operators.emplace_back(op); + return ind; +} + uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode) { // TODO If the same OperatorCode is registered already, just return it @@ -531,6 +578,15 @@ uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode) return ind; } +uint32_t CircleGen::addCustomOperatorCode(std::string custom_code) +{ + // TODO If the same OperatorCode is registered already, just return it + uint32_t ind = _opcodes.size(); + _opcodes.emplace_back( + circle::CreateOperatorCodeDirect(_fbb, circle::BuiltinOperator_CUSTOM, custom_code.c_str())); + return ind; +} + flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size) { if (buf == nullptr && size == 0) diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h index ab7707d5a..f6f799668 100644 --- a/tests/nnfw_api/src/CircleGen.h +++ b/tests/nnfw_api/src/CircleGen.h @@ -159,6 +159,11 @@ public: int stride_w, int stride_h, int depth_multiplier, circle::ActivationFunctionType actfn, int dilation_w = 1, int dilation_h = 1); + uint32_t addOperatorDetectionPostProcess(const OperatorParams ¶ms, int num_classes, + float y_scale, float x_scale, float h_scale, + float w_scale, float nms_score_threshold, + float nms_iou_threshold, int max_detections, + int max_classes_per_detection, int detections_per_class); uint32_t addOperatorElu(const OperatorParams ¶ms); uint32_t addOperatorEqual(const OperatorParams ¶ms); uint32_t addOperatorExpandDims(const OperatorParams ¶ms); @@ -220,7 +225,15 @@ private: uint32_t addOperatorWithOptions(const OperatorParams ¶ms, circle::BuiltinOperator opcode, circle::BuiltinOptions options_type, flatbuffers::Offset<void> options); + uint32_t addCustomOperatorWithOptions(const OperatorParams ¶ms, std::string custom_code, + circle::BuiltinOptions options_type, + flatbuffers::Offset<void> options, + const std::vector<uint8_t> *custom_options, + circle::CustomOptionsFormat custom_options_format, + const std::vector<uint8_t> *mutating_variable_inputs, + const std::vector<int32_t> *intermediates); uint32_t addOperatorCode(circle::BuiltinOperator opcode); + uint32_t addCustomOperatorCode(std::string custom_code); flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size); flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms); flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams ¶ms, float scale, diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc index 3df7e7403..dda098698 100644 --- a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc +++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc @@ -36,28 +36,6 @@ class ArgMinMaxVariation : public GenModelTest, // Reduce axis: 1 // Output shape: {1, 2, 1} // Output type: Int32 -TEST_P(ArgMinMaxVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_INT32; - std::vector<int32_t> axis_data{1}; - uint32_t axis_buf = cgen.addBuffer(axis_data); - int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); - int out = cgen.addTensor({{1, 2, 1}, output_type}); - param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) - : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique<GenModelTestContext>(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - // Test with different input type and value INSTANTIATE_TEST_CASE_P( GenModelTest, ArgMinMaxVariation, @@ -93,6 +71,28 @@ INSTANTIATE_TEST_CASE_P( TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false, circle::TensorType::TensorType_INT8, 1.0, 1})); +TEST_P(ArgMinMaxVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + const auto output_type = circle::TensorType::TensorType_INT32; + std::vector<int32_t> axis_data{1}; + uint32_t axis_buf = cgen.addBuffer(axis_data); + int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 1}, output_type}); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique<GenModelTestContext>(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst) { CircleGen cgen; @@ -132,35 +132,41 @@ TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0) +TEST_P(ArgMinMaxVariation, neg_InvalidAxis0) { + auto ¶m = GetParam(); + CircleGen cgen; const auto output_type = circle::TensorType::TensorType_INT32; std::vector<int32_t> axis_data{4}; uint32_t axis_buf = cgen.addBuffer(axis_data); int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique<GenModelTestContext>(cgen.finish()); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); _context->expectFailCompile(); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1) +TEST_P(ArgMinMaxVariation, neg_InvalidAxis1) { + auto ¶m = GetParam(); + CircleGen cgen; const auto output_type = circle::TensorType::TensorType_INT32; std::vector<int32_t> axis_data{-3}; uint32_t axis_buf = cgen.addBuffer(axis_data); int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point); int out = cgen.addTensor({{2}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique<GenModelTestContext>(cgen.finish()); @@ -188,16 +194,19 @@ TEST_F(GenModelTest, neg_OneOp_ArgMax_InType) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_ArgMax_AxisType) +TEST_P(ArgMinMaxVariation, neg_AxisType) { + auto ¶m = GetParam(); + CircleGen cgen; - const auto output_type = circle::TensorType::TensorType_FLOAT32; + const auto output_type = circle::TensorType::TensorType_INT32; std::vector<float> axis_data{4}; uint32_t axis_buf = cgen.addBuffer(axis_data); int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, output_type); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type); cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique<GenModelTestContext>(cgen.finish()); @@ -224,16 +233,20 @@ TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_ArgMax_paramType) +TEST_P(ArgMinMaxVariation, neg_paramType) { + auto ¶m = GetParam(); + CircleGen cgen; const auto output_type = circle::TensorType::TensorType_INT32; + const auto output_param = circle::TensorType::TensorType_INT64; std::vector<int32_t> axis_data{4}; uint32_t axis_buf = cgen.addBuffer(axis_data); int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf}); - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point); int out = cgen.addTensor({{1, 2, 1}, output_type}); - cgen.addOperatorArgMax({{in, axis}, {out}}, circle::TensorType::TensorType_INT64); + param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param) + : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param); cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique<GenModelTestContext>(cgen.finish()); diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc index 2fb1d6898..15ddac210 100644 --- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc +++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc @@ -42,27 +42,6 @@ class AveragePool2DVariation : public GenModelTest, { }; -TEST_P(AveragePool2DVariation, Test) -{ - auto ¶m = GetParam(); - CircleGen cgen; - - int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, - param.type.zero_point); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, - param.param.stride_h, param.param.filter_w, param.param.filter_h, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - - _context = std::make_unique<GenModelTestContext>(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends(param.backend); - - SUCCEED(); -} - // Test with different input type and value INSTANTIATE_TEST_CASE_P( GenModelTest, AveragePool2DVariation, @@ -108,6 +87,27 @@ INSTANTIATE_TEST_CASE_P( {circle::TensorType::TensorType_INT8, 2.0, -1}, {"cpu"}})); +TEST_P(AveragePool2DVariation, Test) +{ + auto ¶m = GetParam(); + CircleGen cgen; + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, + param.param.stride_h, param.param.filter_w, param.param.filter_h, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + + _context = std::make_unique<GenModelTestContext>(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends(param.backend); + + SUCCEED(); +} + TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput) { // 3D Tensors are not supported @@ -142,13 +142,18 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidPaddingType) +TEST_P(AveragePool2DVariation, neg_InvalidPaddingType) { + auto ¶m = GetParam(); CircleGen cgen; - int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99), 2, 2, 2, 2, - circle::ActivationFunctionType_NONE); + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99), + param.param.stride_w, param.param.stride_h, param.param.filter_w, + param.param.filter_h, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique<GenModelTestContext>(cgen.finish()); @@ -157,12 +162,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidPaddingType) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_1) +TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1) { + auto ¶m = GetParam(); CircleGen cgen; - int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, -1, 2, + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, + param.param.stride_h, -1, param.param.filter_h, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({in}, {out}); @@ -172,12 +182,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_1) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_2) +TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2) { + auto ¶m = GetParam(); CircleGen cgen; - int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 0, + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, + param.param.stride_h, param.param.filter_w, 0, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({in}, {out}); @@ -187,12 +202,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_2) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_1) +TEST_P(AveragePool2DVariation, neg_InvalidStrides_1) { + auto ¶m = GetParam(); CircleGen cgen; - int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, 2, 2, 2, + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h, + param.param.filter_w, param.param.filter_h, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({in}, {out}); @@ -202,12 +222,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_1) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_2) +TEST_P(AveragePool2DVariation, neg_InvalidStrides_2) { + auto ¶m = GetParam(); CircleGen cgen; - int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 1, -100, 2, 2, + + int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale, + param.type.zero_point); + cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100, + param.param.filter_w, param.param.filter_h, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({in}, {out}); diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc index 6e2435965..f4397ba66 100644 --- a/tests/nnfw_api/src/one_op_tests/Concat.cc +++ b/tests/nnfw_api/src/one_op_tests/Concat.cc @@ -59,25 +59,6 @@ class ConcatVariation : public GenModelTest, // Input shape: {2, 3} / {2, 3} // Output shape: {4, 3} -TEST_P(ConcatVariation, Test) -{ - auto ¶m = GetParam(); - - CircleGen cgen; - int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); - int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); - cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0, - circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({input1, input2}, {output}); - - _context = std::make_unique<GenModelTestContext>(cgen.finish()); - _context->addTestCase(param.tcd); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - INSTANTIATE_TEST_CASE_P( GenModelTest, ConcatVariation, ::testing::Values( @@ -107,6 +88,25 @@ INSTANTIATE_TEST_CASE_P( {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}), circle::TensorType::TensorType_INT64})); +TEST_P(ConcatVariation, Test) +{ + auto ¶m = GetParam(); + + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); + cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0, + circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({input1, input2}, {output}); + + _context = std::make_unique<GenModelTestContext>(cgen.finish()); + _context->addTestCase(param.tcd); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D) { CircleGen cgen; @@ -180,13 +180,14 @@ TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Concat_InvalidAxis) +TEST_P(ConcatVariation, neg_InvalidAxis) { - CircleGen cgen; + auto ¶m = GetParam(); - int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - int input2 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32}); + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); int axis = 2; cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, @@ -200,13 +201,14 @@ TEST_F(GenModelTest, neg_OneOp_Concat_InvalidAxis) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Concat_InvalidRank) +TEST_P(ConcatVariation, neg_InvalidRank) { - CircleGen cgen; + auto ¶m = GetParam(); - int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - int input2 = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32}); - int output = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32}); + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point); int axis = 0; cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, @@ -220,13 +222,14 @@ TEST_F(GenModelTest, neg_OneOp_Concat_InvalidRank) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Concat_InvalidDimension) +TEST_P(ConcatVariation, neg_InvalidDimension) { - CircleGen cgen; + auto ¶m = GetParam(); - int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32}); - int input2 = cgen.addTensor({{3, 2}, circle::TensorType::TensorType_FLOAT32}); - int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32}); + CircleGen cgen; + int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point); + int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point); + int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point); int axis = 0; cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis, diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc index 9f563401f..a4fe88493 100644 --- a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc +++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc @@ -29,6 +29,9 @@ class DepthToSpaceVariation : public GenModelTest, { }; +// Input shape: {1, 1, 2, 4} +// Block size: 2 +// Output shape: {1, 2, 4, 1} INSTANTIATE_TEST_CASE_P( GenModelTest, DepthToSpaceVariation, ::testing::Values( @@ -52,9 +55,6 @@ INSTANTIATE_TEST_CASE_P( uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}), circle::TensorType::TensorType_INT8, 1.0f, -2})); -// Input shape: {1, 1, 2, 4} -// Block size: 2 -// Output shape: {1, 2, 4, 1} TEST_P(DepthToSpaceVariation, Test) { auto ¶m = GetParam(); @@ -72,12 +72,13 @@ TEST_P(DepthToSpaceVariation, Test) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_DepthToSpace_Blocksize) +TEST_P(DepthToSpaceVariation, neg_Blocksize) { + auto ¶m = GetParam(); + CircleGen cgen; - circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32; - int in = cgen.addTensor({{1, 1, 2, 4}, data_type}); - int out = cgen.addTensor({{1, 2, 4, 1}, data_type}); + int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point); + int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point); cgen.addOperatorDepthToSpace({{in}, {out}}, -2); cgen.setInputsAndOutputs({in}, {out}); diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc index 658c44cb9..a0bdbf9e6 100644 --- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc +++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc @@ -257,50 +257,6 @@ class DepthwiseConv2DQuantTest using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>; using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>; -CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier) -{ - assert(1 <= stride && stride <= 2); - assert(1 <= input_depth && input_depth <= 16); - assert(1 <= depth_multiplier && depth_multiplier <= 32); - - const int output_depth = input_depth * depth_multiplier; - assert(1 <= output_depth && output_depth <= 32); - - CircleGen cgen; - uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, - 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); - uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0)); - int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0); - int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); - int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); - int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0); - cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, - stride, depth_multiplier, circle::ActivationFunctionType_NONE); - cgen.setInputsAndOutputs({in}, {out}); - return cgen.finish(); -} - -TEST_P(DepthwiseConv2DQuantTestU8, Test) -{ - // Same input is used for all tests but output differs - static const std::vector<uint8_t> input64{ - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, - 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; - - auto ¶m = GetParam(); - _context = std::make_unique<GenModelTestContext>( - genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier)); - std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4); - _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output})); - _context->setBackends({"acl_cl", "acl_neon", "cpu"}); - - SUCCEED(); -} - // Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU // kernels. INSTANTIATE_TEST_CASE_P( @@ -337,10 +293,7 @@ INSTANTIATE_TEST_CASE_P( DepthwiseConv2DQuantTestParamU8{ 2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); -using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>; -using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>; - -CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier) +CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier) { assert(1 <= stride && stride <= 2); assert(1 <= input_depth && input_depth <= 16); @@ -350,40 +303,43 @@ CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int dep assert(1 <= output_depth && output_depth <= 32); CircleGen cgen; - uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{ + uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0)); - int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0); - int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0); + int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0); + int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0); int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); - int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0); + int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0); cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, stride, depth_multiplier, circle::ActivationFunctionType_NONE); cgen.setInputsAndOutputs({in}, {out}); return cgen.finish(); } -TEST_P(DepthwiseConv2DQuantTestI8, Test) +TEST_P(DepthwiseConv2DQuantTestU8, Test) { // Same input is used for all tests but output differs - static const std::vector<int8_t> input64{ + static const std::vector<uint8_t> input64{ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; auto ¶m = GetParam(); _context = std::make_unique<GenModelTestContext>( - genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier)); - std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4); - _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output})); + genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier)); + std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4); + _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output})); _context->setBackends({"acl_cl", "acl_neon", "cpu"}); SUCCEED(); } +using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>; +using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>; + // Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU // kernels. INSTANTIATE_TEST_CASE_P( @@ -420,6 +376,50 @@ INSTANTIATE_TEST_CASE_P( DepthwiseConv2DQuantTestParamI8{ 2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}})); +CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier) +{ + assert(1 <= stride && stride <= 2); + assert(1 <= input_depth && input_depth <= 16); + assert(1 <= depth_multiplier && depth_multiplier <= 32); + + const int output_depth = input_depth * depth_multiplier; + assert(1 <= output_depth && output_depth <= 32); + + CircleGen cgen; + uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, + 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); + uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0)); + int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0); + int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0); + int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0); + int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0); + cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride, + stride, depth_multiplier, circle::ActivationFunctionType_NONE); + cgen.setInputsAndOutputs({in}, {out}); + return cgen.finish(); +} + +TEST_P(DepthwiseConv2DQuantTestI8, Test) +{ + // Same input is used for all tests but output differs + static const std::vector<int8_t> input64{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, + 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2}; + + auto ¶m = GetParam(); + _context = std::make_unique<GenModelTestContext>( + genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier)); + std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4); + _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output})); + _context->setBackends({"acl_cl", "acl_neon", "cpu"}); + + SUCCEED(); +} + TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType) { _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel( diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc new file mode 100644 index 000000000..188638bbb --- /dev/null +++ b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GenModelTest.h" + +#include <memory> + +TEST_F(GenModelTest, OneOp_DetectionPostProcess_SingleBox) +{ + CircleGen cgen; + + int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + int scores = cgen.addTensor({{1, 1, 2}, circle::TensorType::TensorType_FLOAT32}); + int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + + int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + + cgen.addOperatorDetectionPostProcess( + {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 1, 10, 10, 5, 5, + 0.8, 0.5, 1, 1, 1); + cgen.setInputsAndOutputs({boxes, scores, anchors}, + {box_coors, box_classes, box_scores, num_selected}); + + _context = std::make_unique<GenModelTestContext>(cgen.finish()); + _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.9}, {0, 0, 1, 1}}, + {{-0.5, -0.5, 0.5, 0.5}, {0}, {0.9}, {1}})); + _context->setBackends({"cpu"}); + + SUCCEED(); +} + +TEST_F(GenModelTest, neg_OneOp_DetectionPostProcess_SinglBox_MultiClasses) +{ + CircleGen cgen; + + int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + int scores = cgen.addTensor({{1, 1, 3}, circle::TensorType::TensorType_FLOAT32}); + int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + + int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32}); + int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32}); + + cgen.addOperatorDetectionPostProcess( + {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 2, 10, 10, 5, 5, + 0.8, 0.5, 1, 1, 1); + cgen.setInputsAndOutputs({boxes, scores, anchors}, + {box_coors, box_classes, box_scores, num_selected}); + + _context = std::make_unique<GenModelTestContext>(cgen.finish()); + _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.7, 0.9}, {0, 0, 1, 1}}, + {{-0.5, -0.5, 0.5, 0.5}, {1}, {0.9}, {1}})); + _context->setBackends({"cpu"}); + _context->expectFailModelLoad(); + + SUCCEED(); +} diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc index 42971da79..c376c1c02 100644 --- a/tests/nnfw_api/src/one_op_tests/Pad.cc +++ b/tests/nnfw_api/src/one_op_tests/Pad.cc @@ -31,6 +31,21 @@ class PadVariation : public GenModelTest, public ::testing::WithParamInterface<P { }; +// Test with different value type +INSTANTIATE_TEST_CASE_P( + GenModelTest, PadVariation, + ::testing::Values( + // float value + PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})}, + // uint8 value + PadParam{ + uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}), + circle::TensorType::TensorType_UINT8, 1.0, 8}, + // int8 value + PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}}, + {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}), + circle::TensorType::TensorType_INT8, 1.0, -5})); + TEST_P(PadVariation, Test) { auto ¶m = GetParam(); @@ -51,29 +66,16 @@ TEST_P(PadVariation, Test) SUCCEED(); } -// Test with different value type -INSTANTIATE_TEST_CASE_P( - GenModelTest, PadVariation, - ::testing::Values( - // float value - PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})}, - // uint8 value - PadParam{ - uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}), - circle::TensorType::TensorType_UINT8, 1.0, 8}, - // int8 value - PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}}, - {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}), - circle::TensorType::TensorType_INT8, 1.0, -5})); - -TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank) +TEST_P(PadVariation, neg_InvalidPadRank) { + auto ¶m = GetParam(); + CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); std::vector<int32_t> padding_data{1, 1, 1, 1}; uint32_t padding_buf = cgen.addBuffer(padding_data); int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); cgen.addOperatorPad({{in, padding}, {out}}); cgen.setInputsAndOutputs({in}, {out}); @@ -85,14 +87,16 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0) +TEST_P(PadVariation, neg_InvalidPadDim0) { + auto ¶m = GetParam(); + CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); std::vector<int32_t> padding_data{1, 1, 1, 1}; uint32_t padding_buf = cgen.addBuffer(padding_data); int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); cgen.addOperatorPad({{in, padding}, {out}}); cgen.setInputsAndOutputs({in}, {out}); @@ -104,14 +108,16 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1) +TEST_P(PadVariation, neg_InvalidPadDim1) { + auto ¶m = GetParam(); + CircleGen cgen; - int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); std::vector<int32_t> padding_data{1, 1, 1, 1}; uint32_t padding_buf = cgen.addBuffer(padding_data); int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}); + int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point); cgen.addOperatorPad({{in, padding}, {out}}); cgen.setInputsAndOutputs({in}, {out}); @@ -123,14 +129,20 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Pad_Type) +TEST_P(PadVariation, neg_Type) { + auto ¶m = GetParam(); + + const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8) + ? circle::TensorType::TensorType_INT8 + : circle::TensorType::TensorType_UINT8); + CircleGen cgen; - int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32}); + int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point); std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0}; uint32_t padding_buf = cgen.addBuffer(padding_data); int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf}); - int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1); + int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0); cgen.addOperatorPad({{in, padding}, {out}}); cgen.setInputsAndOutputs({in}, {out}); diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc index 960cd88e3..002fb0132 100644 --- a/tests/nnfw_api/src/one_op_tests/Slice.cc +++ b/tests/nnfw_api/src/one_op_tests/Slice.cc @@ -34,6 +34,32 @@ class SliceVariation : public GenModelTest, { }; +INSTANTIATE_TEST_CASE_P( + GenModelTest, SliceVariation, + ::testing::Values( + SliceVariationParam{ + {2, 2, 3, 1}, + {0, 1, 1, 0}, + {1, 1, 2, 1}, + uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})}, + SliceVariationParam{ + {2, 2, 3, 1}, + {0, 1, 1, 0}, + {1, 1, 2, 1}, + uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), + circle::TensorType::TensorType_UINT8, + 1, + 0}, + SliceVariationParam{ + {2, 2, 3, 1}, + {0, 1, 1, 0}, + {1, 1, 2, 1}, + uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), + circle::TensorType::TensorType_FLOAT32, + 0, + 0, + circle::TensorType::TensorType_INT64})); + TEST_P(SliceVariation, Test) { auto ¶m = GetParam(); @@ -90,32 +116,6 @@ TEST_P(SliceVariation, Test) SUCCEED(); } -INSTANTIATE_TEST_CASE_P( - GenModelTest, SliceVariation, - ::testing::Values( - SliceVariationParam{ - {2, 2, 3, 1}, - {0, 1, 1, 0}, - {1, 1, 2, 1}, - uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})}, - SliceVariationParam{ - {2, 2, 3, 1}, - {0, 1, 1, 0}, - {1, 1, 2, 1}, - uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), - circle::TensorType::TensorType_UINT8, - 1, - 0}, - SliceVariationParam{ - {2, 2, 3, 1}, - {0, 1, 1, 0}, - {1, 1, 2, 1}, - uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}), - circle::TensorType::TensorType_FLOAT32, - 0, - 0, - circle::TensorType::TensorType_INT64})); - TEST_F(GenModelTest, neg_OneOp_Slice_Type) { CircleGen cgen; @@ -136,18 +136,48 @@ TEST_F(GenModelTest, neg_OneOp_Slice_Type) SUCCEED(); } -TEST_F(GenModelTest, neg_OneOp_Slice_DiffType) +TEST_P(SliceVariation, neg_DiffType) { + auto ¶m = GetParam(); + CircleGen cgen; - int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32}); - std::vector<int32_t> begins_data = {0, 0, 1, 0}; - uint32_t begins_buf = cgen.addBuffer(begins_data); - int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, begins_buf}); - std::vector<int64_t> sizes_data = {1, 2, 1, 1}; - uint32_t sizes_buf = cgen.addBuffer(sizes_data); - int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_INT64, sizes_buf}); - int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32}); - cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + + int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point); + int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point); + if (param.begins_type == circle::TensorType::TensorType_INT32) + { + uint32_t begins_buf = cgen.addBuffer(param.begins); + std::vector<int64_t> sizes_64(param.sizes.size()); + for (int i = 0; i < param.begins.size(); i++) + { + sizes_64[i] = param.sizes[i]; + } + + int rank = param.begins.size(); + int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); + + uint32_t sizes_buf = cgen.addBuffer(sizes_64); + int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf}); + + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + } + else if (param.begins_type == circle::TensorType::TensorType_INT64) + { + std::vector<int64_t> begins_64(param.begins.size()); + for (int i = 0; i < param.begins.size(); i++) + { + begins_64[i] = param.begins[i]; + } + + uint32_t begins_buf = cgen.addBuffer(begins_64); + int rank = param.begins.size(); + int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf}); + + uint32_t sizes_buf = cgen.addBuffer(param.sizes); + int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf}); + + cgen.addOperatorSlice({{in, begins, sizes}, {out}}); + } cgen.setInputsAndOutputs({in}, {out}); _context = std::make_unique<GenModelTestContext>(cgen.finish()); diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc index 95debec33..aba4e89a0 100644 --- a/tests/nnfw_api/src/one_op_tests/Softmax.cc +++ b/tests/nnfw_api/src/one_op_tests/Softmax.cc @@ -30,6 +30,23 @@ class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterfa { }; +// Test with different value type +INSTANTIATE_TEST_CASE_P( + GenModelTest, SoftmaxVariation, + ::testing::Values( + // float value + SoftmaxParam{ + uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}}, + {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})}, + // uint8 value + SoftmaxParam{ + uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}), + circle::TensorType::TensorType_UINT8, 1.0, 10}, + // int8 value + SoftmaxParam{ + uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}), + circle::TensorType::TensorType_INT8, 1.0, 0})); + TEST_P(SoftmaxVariation, Test) { auto ¶m = GetParam(); @@ -95,28 +112,14 @@ TEST_F(GenModelTest, OneOp_Softmax) SUCCEED(); } -// Test with different value type -INSTANTIATE_TEST_CASE_P( - GenModelTest, SoftmaxVariation, - ::testing::Values( - // float value - SoftmaxParam{ - uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}}, - {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})}, - // uint8 value - SoftmaxParam{ - uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}), - circle::TensorType::TensorType_UINT8, 1.0, 10}, - // int8 value - SoftmaxParam{ - uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}), - circle::TensorType::TensorType_INT8, 1.0, 0})); - -TEST_F(GenModelTest, neg_OneOp_Softmax_Type) +TEST_P(SoftmaxVariation, neg_Type) { + auto ¶m = GetParam(); + CircleGen cgen; - int input = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32}); - int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_INT8}, 1.0, 0); + int input = + cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point); + int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL}); cgen.addOperatorSoftmax({{input}, {out}}, 0.1); cgen.setInputsAndOutputs({input}, {out}); diff --git a/tools/release_tool/onert_version.sh b/tools/release_tool/onert_version.sh index 5c875e38b..374a58acf 100755 --- a/tools/release_tool/onert_version.sh +++ b/tools/release_tool/onert_version.sh @@ -27,7 +27,7 @@ show_version() { current_version=${version_line#"Version:"} if [ $nightly -eq 0 ]; then - echo $current_version~$(date "+%y%m%d%H") + echo $current_version~$(date -u "+%y%m%d%H") else echo $current_version fi |