summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ahub/tcchecker-tca/config.yaml2
-rw-r--r--.gitattributes1
-rw-r--r--.github/workflows/check-format.yml64
-rw-r--r--.github/workflows/check-pr-commit.yml51
-rw-r--r--compiler/circle-execution-plan/CMakeLists.txt16
-rw-r--r--compiler/circle-execution-plan/README.md29
-rw-r--r--compiler/circle-execution-plan/requires.cmake4
-rw-r--r--compiler/circle-execution-plan/src/CircleExecutionPlan.cpp99
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.cpp389
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.h130
-rw-r--r--compiler/circle-quantizer/src/CircleQuantizer.cpp44
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst56
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h33
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h37
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h70
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h35
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALElu.h33
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h34
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h33
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h32
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALMul.h45
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALNeg.h32
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h37
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h37
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h78
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h38
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h35
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALSub.h35
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/pal.cmake62
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.cpp11
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.test.cpp39
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.cpp1
-rw-r--r--compiler/luci-interpreter/src/loader/CMakeLists.txt2
-rw-r--r--compiler/luci-interpreter/src/loader/GraphLoader.cpp62
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp15
-rw-r--r--compiler/luci-micro/CMakeLists.txt5
-rw-r--r--compiler/luci-micro/standalone/Toolchain.cmake8
-rw-r--r--compiler/luci/CMakeLists.txt4
-rw-r--r--compiler/luci/env/CMakeLists.txt8
-rw-r--r--compiler/luci/export/CMakeLists.txt37
-rw-r--r--compiler/luci/export/src/CircleExporter.test.cpp137
-rw-r--r--compiler/luci/import/CMakeLists.txt11
-rw-r--r--compiler/luci/import/include/luci/Import/CircleReader.h71
-rw-r--r--compiler/luci/import/src/CircleReader.cpp184
-rw-r--r--compiler/luci/import/src/CircleReader.test.cpp67
-rw-r--r--compiler/luci/import/src/Importer.cpp1
-rw-r--r--compiler/luci/import/src/Importer.test.cpp285
-rw-r--r--compiler/luci/lang/CMakeLists.txt8
-rw-r--r--compiler/luci/log/CMakeLists.txt8
-rw-r--r--compiler/luci/log/include/luci/Log.h5
-rw-r--r--compiler/luci/log/src/Log.cpp20
-rw-r--r--compiler/luci/logex/CMakeLists.txt8
-rw-r--r--compiler/luci/partition/CMakeLists.txt8
-rw-r--r--compiler/luci/partition/src/PartitionMerge.cpp3
-rw-r--r--compiler/luci/partition/src/PartitionPGroups.cpp115
-rw-r--r--compiler/luci/pass/CMakeLists.txt10
-rw-r--r--compiler/luci/pass/include/luci/CircleOptimizer.h5
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h22
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp22
-rw-r--r--compiler/luci/pass/src/FuseActivationFunctionPass.cpp10
-rw-r--r--compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp77
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp157
-rw-r--r--compiler/luci/plan/CMakeLists.txt19
-rw-r--r--compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp84
-rw-r--r--compiler/luci/profile/CMakeLists.txt8
-rw-r--r--compiler/luci/service/CMakeLists.txt8
-rw-r--r--compiler/one-cmds/one-build44
-rw-r--r--compiler/one-cmds/one-optimize17
-rw-r--r--compiler/one-cmds/one-quantize89
-rw-r--r--compiler/one-cmds/tests/OONE-BUILD_014.cfg2
-rw-r--r--compiler/one-cmds/tests/one-build_014.cfg22
-rw-r--r--compiler/one-cmds/tests/one-build_014.test77
-rw-r--r--compiler/one-cmds/tests/one-build_neg_007.test69
-rw-r--r--compiler/one-cmds/tests/one-build_neg_008.test41
-rw-r--r--compiler/one-cmds/tests/one-build_neg_009.test41
-rw-r--r--compiler/one-cmds/tests/one-quantize_007.test55
-rw-r--r--compiler/one-cmds/tests/one-quantize_008.test55
-rw-r--r--compiler/one-cmds/tests/one-quantize_neg_019.test50
-rw-r--r--compiler/one-cmds/tests/onecc_023.cfg15
-rw-r--r--compiler/one-cmds/tests/onecc_023.test42
-rw-r--r--compiler/one-cmds/utils.py80
-rw-r--r--compiler/tflchef/core/src/ModelChef.cpp103
-rw-r--r--compiler/tflchef/proto/tflchef.proto17
-rw-r--r--compiler/tflchef/tests/signature_def_index/test.recipe60
-rw-r--r--compiler/tflchef/tests/signature_def_name/test.recipe60
-rw-r--r--compiler/tfldump/src/Dump.cpp12
-rw-r--r--compiler/tflite2circle/driver/Driver.cpp5
-rw-r--r--compiler/tflite2circle/include/CircleModel.h12
-rw-r--r--compiler/tflite2circle/src/CircleModel.cpp150
-rw-r--r--compiler/vconone/CMakeLists.txt2
-rw-r--r--docs/conf.py2
-rw-r--r--docs/release/1.19/index.rst13
-rw-r--r--docs/release/1.19/release-note-1.19.0.md8
-rw-r--r--infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake4
-rw-r--r--infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake5
-rw-r--r--infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake5
-rw-r--r--infra/cmake/packages/FlatBuffersConfig.cmake5
-rw-r--r--infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake4
-rw-r--r--infra/debian/compiler/changelog7
-rw-r--r--infra/debian/runtime/changelog6
-rw-r--r--infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake66
-rw-r--r--infra/nnfw/cmake/packages/ARMComputeConfig.cmake7
-rw-r--r--packaging/nnfw.spec2
-rw-r--r--res/TensorFlowLiteRecipes/Conv2D_005/test.recipe34
-rw-r--r--res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe42
-rw-r--r--res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe82
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/test/core/compiler/HEScheduler.cc16
109 files changed, 4330 insertions, 238 deletions
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index 86d272d8a..40635d443 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -16,9 +16,7 @@ test:
- /runtime/onert/test/graph/verifier
- /runtime/onert/test/ir
- /runtime/onert/test/util
- - /tests/nnapi/src
- /tests/nnfw_api/src
- - /tests/tools/tflite_run/src
testFile:
- extension: cpp
diff --git a/.gitattributes b/.gitattributes
index b8eec3df8..d36985416 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
tests/nnapi/specs/* linguist-detectable=false
+res/* linguist-detectable=false
diff --git a/.github/workflows/check-format.yml b/.github/workflows/check-format.yml
new file mode 100644
index 000000000..bcbc3c5f8
--- /dev/null
+++ b/.github/workflows/check-format.yml
@@ -0,0 +1,64 @@
+name: Check code format
+
+on:
+ push:
+ branches:
+ - master
+ - release/*
+ pull_request:
+ branches:
+ - master
+ - release/*
+
+defaults:
+ run:
+ shell: bash
+
+jobs:
+ check-format:
+ name: Check format
+ runs-on: ubuntu-20.04
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+
+ - name: Setup python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.x'
+
+ # C format: clang-format-8
+ # Python format: yapf==0.22.0
+ - name: Install packages
+ run: |
+ sudo apt-get install -y clang-format-8
+ python -m pip install --upgrade pip
+ pip install yapf==0.22.0
+
+ - name: Check
+ run: ./nnas format
+
+ # Upload patch file if failed
+ - name: Store archive
+ uses: actions/upload-artifact@v2
+ if: failure()
+ with:
+ name: format-patch
+ path: format.patch
+ retention-days: 3
+
+ check-copyright:
+ name: Check copyright
+ runs-on: ubuntu-20.04
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ with:
+ # Fetch all history and branch (default: 1)
+ # Require all history to get file creation date
+ fetch-depth: 0
+
+ - name: Check copyright
+ run: ./nnfw copyright-check
diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml
new file mode 100644
index 000000000..38c76dc18
--- /dev/null
+++ b/.github/workflows/check-pr-commit.yml
@@ -0,0 +1,51 @@
+name: Check PR commit
+
+on:
+ pull_request:
+ branches:
+ - master
+ - release/*
+
+defaults:
+ run:
+ shell: bash
+
+jobs:
+ check-commit-message:
+ name: Check commit message
+ runs-on: ubuntu-20.04
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ with:
+ # Checkout PR head commit
+ # Checkout Action use merge commit as default
+ ref: ${{ github.event.pull_request.head.sha }}
+ # Fetch all history and branch (default: 1)
+ fetch-depth: 0
+
+ - name: Get commit body
+ run: |
+ git log origin/${GITHUB_BASE_REF}..HEAD --format=%b > commit_msg.txt
+ sed '/^$/d' commit_msg.txt > commit_body.txt
+
+ - name: Check signed-off
+ run: |
+ # Check string starting from "Signed-off-by:"
+ count=$(cat commit_body.txt | grep 'Signed-off-by:' | wc -l)
+ if [[ ! "$count" -ge "1" ]]; then
+ exit 1
+ fi
+
+ echo "Signed-off-by is OK"
+
+ - name: Check body words
+ # Run if check_signed_off step is failed
+ if: ${{ always() }}
+ run: |
+ count=$(cat commit_body.txt | sed '/Signed-off-by:/d' | wc -w)
+ echo "Commit body word check: $count words"
+ if [[ "$count" -lt "5" ]]; then
+ exit 1
+ fi
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
new file mode 100644
index 000000000..115d24860
--- /dev/null
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(SOURCES
+ src/CircleExecutionPlan.cpp
+ src/ExecutionPlanner.cpp
+ src/ExecutionPlanner.h
+ )
+
+add_executable(circle_execution_plan "${SOURCES}")
+target_link_libraries(circle_execution_plan foder)
+target_link_libraries(circle_execution_plan safemain)
+target_link_libraries(circle_execution_plan luci_env)
+target_link_libraries(circle_execution_plan luci_import)
+target_link_libraries(circle_execution_plan luci_export)
+target_link_libraries(circle_execution_plan luci_plan)
+target_link_libraries(circle_execution_plan arser)
+
+install(TARGETS circle_execution_plan DESTINATION bin)
diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md
new file mode 100644
index 000000000..e789a55db
--- /dev/null
+++ b/compiler/circle-execution-plan/README.md
@@ -0,0 +1,29 @@
+# circle-execution-plan
+
+_circle-execution-plan_ tool provides model with "execution plan".
+
+This tool takes circle file as input and returns modified circle file.
+The output circle file contains plan (`CircleNodeMemoryPlan`) information for every node.
+
+
+"execution plan" contains:
+- number which determines order in which nodes will be executed
+- memory offsets for node output tensors from the beginning of shared memory buffer
+
+In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`.
+For this purpose we use `std::map<uint32_t, std::vector<uint32_t>> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data.
+
+### Execution plan building
+
+In order to build "execution plan" we use `ExecutionPlanner` class.
+The main method is `get_execution_plan()` which for each node finds and writes to its annotations
+"execution plan". For this purpose there are two steps:
+- determining the order of execution of nodes, which is stored in `_ordered_nodes` vector.
+Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`.
+ In the future we can add new method and find the most suitable way to graph traversal.
+
+- determining memory offsets for nodes from the beginning of shared memory buffer, which is stored in `_offsets`.
+Now for this purpose there is one method `get_offsets_with_greedy_by_size()` that is the implementation of the "Greedy by Size" algorithm, which is described in https://arxiv.org/pdf/2001.03288.pdf article.
+ The main objective is to minimize the size of the allocated memory block.
+ In the future, other methods may also appear here to determine memory offsets for nodes
+ in the best way.
diff --git a/compiler/circle-execution-plan/requires.cmake b/compiler/circle-execution-plan/requires.cmake
new file mode 100644
index 000000000..76858f487
--- /dev/null
+++ b/compiler/circle-execution-plan/requires.cmake
@@ -0,0 +1,4 @@
+require(foder)
+require(safemain)
+require(luci)
+require(arser)
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
new file mode 100644
index 000000000..a54100b8c
--- /dev/null
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+#include "ExecutionPlanner.h"
+
+#include <arser/arser.h>
+
+#include <functional>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <cstdlib>
+
+int entry(int argc, char **argv)
+{
+ arser::Arser arser("circle_execution_plan provides model with execution plan meta information");
+
+ arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
+ arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ std::cout << arser;
+ return 255;
+ }
+
+ std::string input_path = arser.get<std::string>("input");
+ std::string output_path = arser.get<std::string>("output");
+
+ foder::FileLoader file_loader{input_path};
+ std::vector<char> model_data;
+
+ try
+ {
+ model_data = file_loader.load();
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+ if (circle_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // Import from input Circle file
+ luci::Importer importer;
+ auto module = importer.importModule(circle_model);
+
+ // Do main job
+ luci::ExecutionPlanner execution_planner(module->graph());
+ execution_planner.get_execution_plan();
+
+ // Export to output Circle file
+ luci::CircleExporter exporter;
+ luci::CircleFileExpContract contract(module.get(), output_path);
+
+ if (!exporter.invoke(&contract))
+ {
+ std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+ return 255;
+ }
+
+ return 0;
+}
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
new file mode 100644
index 000000000..c37d1e5f5
--- /dev/null
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecutionPlanner.h"
+#include <loco/IR/Algorithm.h>
+#include <luci/UserSettings.h>
+
+namespace luci
+{
+namespace
+{
+
+constexpr uint32_t nodeNotAssigned = std::numeric_limits<int32_t>::max();
+
+uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size,
+ uint32_t stride, uint32_t dilation_rate = 1)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ switch (padding)
+ {
+ case Padding::SAME:
+ return (image_size + stride - 1) / stride;
+ case Padding::VALID:
+ return (image_size + stride - effective_filter_size) / stride;
+ default:
+ assert(false);
+ }
+}
+
+// Method finds (if necessary) size for im2col temporary tensor.
+uint32_t compute_im2col_size(const luci::CircleConv2D *conv)
+{
+ auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+ auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+ auto padding = (conv->padding());
+ uint32_t stride_height = conv->stride()->h();
+ uint32_t stride_width = conv->stride()->w();
+
+ uint32_t dilation_height_factor = conv->dilation()->h();
+ uint32_t dilation_width_factor = conv->dilation()->w();
+
+ uint32_t filter_height = filter->dim(1).value();
+ uint32_t filter_width = filter->dim(2).value();
+
+ const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
+ const bool need_non_dilated_im2col =
+ stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
+ bool need_im2col =
+ conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
+
+ if (!need_im2col)
+ {
+ return 0;
+ }
+
+ uint32_t input_depth = conv_input->dim(3).value();
+ uint32_t input_height = conv_input->dim(1).value();
+ uint32_t input_width = conv_input->dim(2).value();
+
+ uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height,
+ dilation_height_factor);
+ uint32_t output_width =
+ compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor);
+
+ uint32_t batches = conv_input->dim(0).value();
+
+ return batches * output_height * output_width * input_depth * filter_height * filter_width *
+ size(conv_input->dtype());
+}
+
+} // namespace
+
+void ExecutionPlanner::get_execution_plan()
+{
+ get_default_execution_order_plan();
+ _required_size = get_offsets_with_greedy_by_size();
+ for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ luci::CircleNodeExecutionPlan execution_plan(i, _offsets[i]);
+ luci::add_execution_plan(loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]),
+ execution_plan);
+ }
+ auto settings = luci::UserSettings::settings();
+ settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
+}
+
+void ExecutionPlanner::get_default_execution_order_plan()
+{
+ // Get execution order in _ordered_nodes
+ _ordered_nodes = loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph)));
+}
+
+void ExecutionPlanner::get_usage_interval()
+{
+ // Initialize vectors of first and last nodes for usage interval
+ _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+ _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+
+ // Vector for count usages
+ std::vector<int> usages_counts(_ordered_nodes.size(), 0);
+
+ auto allocate = [this](uint32_t node, uint32_t tensor) {
+ if (_alloc_node[tensor] != nodeNotAssigned)
+ {
+ return;
+ }
+ assert(_dealloc_node[tensor] == nodeNotAssigned);
+ _alloc_node[tensor] = node;
+ };
+
+ auto deallocate = [this](uint32_t node, uint32_t tensor) {
+ assert(_dealloc_node[tensor] == nodeNotAssigned);
+ _dealloc_node[tensor] = node;
+ };
+
+ // Increase refcounts for graph outputs and inputs nodes
+ for (auto &output_node : output_nodes(_graph))
+ {
+ auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), output_node);
+ size_t index = std::distance(_ordered_nodes.begin(), it);
+ usages_counts[index]++;
+ }
+
+ for (auto &input_node : input_nodes(_graph))
+ {
+ auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), input_node);
+ size_t index = std::distance(_ordered_nodes.begin(), it);
+ usages_counts[index]++;
+ allocate(0, index);
+ }
+
+ // Increase refcounts of usage for all nodes in _ordered_nodes vector
+ for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ const auto node = _ordered_nodes.at(i);
+ auto prev_nodes = preds(node);
+ for (auto &prev_node : prev_nodes)
+ {
+ auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
+ size_t index = std::distance(_ordered_nodes.begin(), it);
+ usages_counts[index]++;
+ }
+ }
+
+ for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ const auto node = _ordered_nodes.at(i);
+ if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+ {
+ allocate(0, i);
+ }
+ allocate(i, i);
+
+ auto prev_nodes = preds(node);
+ for (auto &prev_node : prev_nodes)
+ {
+ auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
+ size_t index = std::distance(_ordered_nodes.begin(), it);
+ usages_counts[index]--;
+ if (usages_counts[index] == 0)
+ {
+ deallocate(i, index);
+ }
+ }
+ }
+}
+
+uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
+{
+ get_usage_interval();
+ auto required_size = greedy_by_size_approach();
+
+ _offsets.resize(_ordered_nodes.size());
+ for (const auto &alloc : _alloc_node_inform_vector)
+ {
+ // Fill offsets vector: first go offset for current node and then should go offsets for
+ // temporaries tensors
+ if (alloc.is_temp)
+ {
+ _offsets[alloc.node_num].push_back(alloc.offset);
+ }
+ else
+ {
+ _offsets[alloc.node_num].insert(_offsets[alloc.node_num].begin(), alloc.offset);
+ }
+ }
+ return required_size;
+}
+
+uint32_t ExecutionPlanner::greedy_by_size_approach()
+{
+ size_t result_size = 0;
+ create_alloc_node_inform_vector(false, false, false);
+ std::vector<AllocationNodeInformation> ordered_alloc_inform;
+ for (auto &current_node : _alloc_node_inform_vector)
+ {
+ if (current_node.size == 0)
+ {
+ current_node.offset = 0;
+ continue;
+ }
+ const uint32_t offsetNotAssigned = std::numeric_limits<uint32_t>::max();
+ size_t best_offset = offsetNotAssigned;
+ uint32_t best_offset_fit = offsetNotAssigned;
+
+ uint32_t current_offset = 0;
+
+ for (const auto &alloc_inform : ordered_alloc_inform)
+ {
+ if ((alloc_inform.last_node < current_node.first_node ||
+ alloc_inform.first_node > current_node.last_node))
+ {
+ continue;
+ }
+
+ if (current_offset + current_node.size <= alloc_inform.offset &&
+ alloc_inform.offset - current_offset < best_offset_fit)
+ {
+ best_offset = current_offset;
+ best_offset_fit = alloc_inform.offset - current_offset;
+ }
+ current_offset = std::max(current_offset, alloc_inform.offset + alloc_inform.size);
+ }
+ if (best_offset == offsetNotAssigned)
+ {
+ best_offset = current_offset;
+ }
+
+ result_size = std::max(result_size, best_offset + current_node.size);
+ current_node.offset = best_offset;
+
+ auto insertion_it =
+ std::upper_bound(ordered_alloc_inform.begin(), ordered_alloc_inform.end(), current_node);
+ ordered_alloc_inform.insert(insertion_it, current_node);
+ }
+ return result_size;
+}
+
+void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
+ bool null_im2col)
+{
+ auto node_compare = [this](const AllocationNodeInformation &alloc_1,
+ const AllocationNodeInformation &alloc_2) {
+ auto idx1 = alloc_1.node_num;
+ auto idx2 = alloc_2.node_num;
+
+ if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned)
+ {
+ if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+ {
+ return idx1 < idx2;
+ }
+ return true;
+ }
+ if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+ {
+ return false;
+ }
+
+ auto size_1 = alloc_1.size;
+ auto size_2 = alloc_2.size;
+
+ if (size_1 != size_2)
+ {
+ return size_1 > size_2;
+ }
+ return this->_alloc_node[idx1] < this->_alloc_node[idx2];
+ };
+
+ _alloc_node_inform_vector.resize(_ordered_nodes.size());
+
+ for (size_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[i]);
+ auto node_size = 1;
+ for (uint32_t axis = 0; axis < circle_node->rank(); ++axis)
+ {
+ node_size *= circle_node->dim(axis).value();
+ }
+ node_size *= size(circle_node->dtype());
+
+ _alloc_node_inform_vector[i].node_num = i;
+ _alloc_node_inform_vector[i].first_node = _alloc_node[i];
+ _alloc_node_inform_vector[i].last_node = _dealloc_node[i];
+
+ const auto *const_node = dynamic_cast<const luci::CircleConst *>(circle_node);
+ if (i == 0 && null_inputs)
+ {
+ _alloc_node_inform_vector[i].size = 0;
+ }
+ else if (const_node && null_consts)
+ {
+ _alloc_node_inform_vector[i].size = 0;
+ }
+ else
+ {
+ _alloc_node_inform_vector[i].size = node_size;
+ }
+
+ // Im2col
+ auto opcode = circle_node->opcode();
+ if (opcode == luci::CircleOpcode::CONV_2D)
+ {
+ auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+ auto im2col_size = compute_im2col_size(conv);
+ if (im2col_size > 0)
+ {
+ AllocationNodeInformation temp_alloc;
+
+ if (null_im2col)
+ {
+ temp_alloc.size = 0;
+ }
+ else
+ {
+ temp_alloc.size = im2col_size;
+ }
+
+ temp_alloc.first_node = i - 1;
+ temp_alloc.last_node = i + 1;
+ temp_alloc.node_num = i;
+ temp_alloc.is_temp = true;
+
+ _alloc_node_inform_vector.push_back(temp_alloc);
+ _alloc_node.push_back(i);
+ _dealloc_node.push_back(i);
+ }
+ }
+ }
+ // Sort _alloc_node_inform_vector with node_compare for the greedy by size approach.
+ std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(), node_compare);
+}
+
+void ExecutionPlanner::dump_inform()
+{
+ uint32_t max_breadth = 0;
+
+ for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
+ {
+ auto current_node_it = std::find_if(
+ _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+ [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
+ for (uint32_t j = 0; j < _ordered_nodes.size(); j++)
+ {
+ auto first_node = _alloc_node[j];
+ auto last_node = _dealloc_node[j];
+
+ auto it = std::find_if(
+ _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+ [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
+ if (i >= first_node && i <= last_node)
+ {
+ current_node_it->breadth += it->size;
+ }
+ }
+ if (max_breadth < current_node_it->breadth)
+ {
+ max_breadth = current_node_it->breadth;
+ }
+
+ auto node = loco::must_cast<luci::CircleNode *>(_ordered_nodes.at(i));
+ printf("node_num = %d node_name = %s node_size = %d node_offset = %d node_breadth = "
+ "%u node_first_node = %d node_last_node = %d\n",
+ i, node->name().c_str(), current_node_it->size, current_node_it->offset,
+ current_node_it->breadth, current_node_it->first_node, current_node_it->last_node);
+ }
+ printf("Lower bound is = %u\n", max_breadth);
+ std::sort(_alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
+ [](const AllocationNodeInformation &first, const AllocationNodeInformation &second) {
+ if (first.breadth != second.breadth)
+ return first.breadth > second.breadth;
+ return first.node_num < second.node_num;
+ });
+}
+
+} // namespace luci
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
new file mode 100644
index 000000000..8e3d9b46a
--- /dev/null
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLANNER_H
+#define CIRCLE_EXECUTION_PLANNER_H
+
+#include <luci/IR/Module.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci
+{
+// struct for additional information for the node. it helps build allocations plan for nodes.
+struct AllocationNodeInformation
+{
+
+ AllocationNodeInformation()
+ {
+ offset = 0;
+ size = 0;
+ node_num = -1;
+ first_node = -1;
+ last_node = -1;
+ is_temp = false;
+ breadth = 0;
+ }
+ // memory offset from the beginning of the buffer
+ uint32_t offset;
+ // node required size
+ uint32_t size;
+ // the value assigned to the node
+ uint32_t node_num;
+ // the value of the node_num of the node when current node first use.
+ // Used to build the usage interval of the current node
+ uint32_t first_node;
+ // the value of the node_num of the node when current node last use.
+ // Used to build the usage interval of the current node
+ uint32_t last_node;
+ // is the current node temporary or not
+ bool is_temp;
+ // operation breadth of current node
+ uint32_t breadth;
+
+ bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; }
+};
+
+class ExecutionPlanner
+{
+public:
+ ExecutionPlanner() = delete;
+ explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; };
+
+ // Method provides execution plan, which contains execution order and
+ // memory offsets for all nodes in _graph.
+ // This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class.
+ void get_execution_plan();
+
+private:
+ // Method gets default execution order plan and saves it in _ordered_nodes vector.
+ // There can be different variants of execution order and this method provides main one.
+ void get_default_execution_order_plan();
+
+ // Method provides nodes with usage interval information.
+ void get_usage_interval();
+
+ // Method dumps execution plan information.
+ void dump_inform();
+
+ // Method finds required offsets for all nodes from _ordered_nodes, using greedy by size approach.
+ // It saves offsets in _offsets vector.
+ // Return: required size of buffer.
+ uint32_t get_offsets_with_greedy_by_size();
+
+ // Realization of greedy by size approach to find offsets for nodes.
+ uint32_t greedy_by_size_approach();
+
+ // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
+ // null_consts = true - size of const nodes will be equal 0;
+ // null_inputs = true - size of input nodes will be equal 0;
+ // null_im2col = true - size of im2col nodes will be equal 0;
+ // It using if we don't want to take input(const or im2col) nodes into account
+ // when determining offsets and calculating the required buffer size. This is uses for
+ // experiments.
+ void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
+ bool null_im2col = false);
+
+ // Stores allocation additional information for the all nodes from _graph.
+ std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
+
+ // Stores nodes in execution order.
+ std::vector<loco::Node *> _ordered_nodes;
+
+ // Stores nodes memory offsets in arena buffer.
+ std::vector<std::vector<uint32_t>> _offsets;
+
+ // Stores positions of nodes in _ordered_nodes vector,
+ // where node in i'th position in this vector first use.
+ // For example, if i'th position of _alloc_node stores j value, then
+ // the node from _ordered_nodes in j'th position is the node when we should allocate (first use)
+ // the node from _ordered_nodes in i'th position.
+ std::vector<uint32_t> _alloc_node;
+
+ // Stores positions of nodes in _ordered_nodes vector,
+ // where node in i'th position in this vector last use.
+ // For example, if i'th position of _alloc_node stores j value, then
+ // the node from _ordered_nodes in j'th position is the node when we can deallocate (last use)
+ // the node from _ordered_nodes in i'th position.
+ std::vector<uint32_t> _dealloc_node;
+
+ loco::Graph *_graph;
+
+ // Required memory size.
+ uint32_t _required_size = 0;
+};
+
+} // namespace luci
+
+#endif // CIRCLE_EXECUTION_PLANNER_H
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 1a09a8a2a..57ac30a87 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -88,24 +88,24 @@ int entry(int argc, char **argv)
.type(arser::DataType::STR_VEC)
.required(false)
.help("Quantize-dequantize weight values required action before quantization. "
- "Three arguments required: input_dtype(float32) "
- "output_dtype(uint8) granularity(layer, channel)");
+ "Three arguments required: input_model_dtype(float32) "
+ "output_model_dtype(uint8) granularity(layer, channel)");
arser.add_argument(qwmm)
.nargs(3)
.type(arser::DataType::STR_VEC)
.required(false)
.help("Quantize with min/max values. "
- "Three arguments required: input_dtype(float32) "
- "output_dtype(uint8) granularity(layer, channel)");
+ "Three arguments required: input_model_dtype(float32) "
+ "output_model_dtype(uint8) granularity(layer, channel)");
arser.add_argument(rq)
.nargs(2)
.type(arser::DataType::STR_VEC)
.required(false)
.help("Requantize a quantized model. "
- "Two arguments required: input_dtype(int8) "
- "output_dtype(uint8)");
+ "Two arguments required: input_model_dtype(int8) "
+ "output_model_dtype(uint8)");
arser.add_argument(fq)
.nargs(3)
@@ -116,6 +116,18 @@ int entry(int argc, char **argv)
"Three arguments required: tensor_name(string), "
"scale(float) zero_point(int)");
+ arser.add_argument("--input_type")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Input type of quantized model (uint8 or int16)");
+
+ arser.add_argument("--output_type")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .required(false)
+ .help("Output type of quantized model (uint8 or int16)");
+
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -163,8 +175,8 @@ int entry(int argc, char **argv)
}
options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
- options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
}
@@ -178,9 +190,17 @@ int entry(int argc, char **argv)
}
options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
- options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
+
+ if (arser["--input_type"])
+ options->param(AlgorithmParameters::Quantize_input_type,
+ arser.get<std::string>("--input_type"));
+
+ if (arser["--output_type"])
+ options->param(AlgorithmParameters::Quantize_output_type,
+ arser.get<std::string>("--output_type"));
}
if (arser[rq])
@@ -193,8 +213,8 @@ int entry(int argc, char **argv)
}
options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
- options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
}
if (arser[fq])
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 000000000..771974afe
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,56 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+ const T2 *axis, const tflite::RuntimeShape &output_shape,
+ T3 *output_data, const std::greater<T1> cmp)
+{
+ tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 000000000..0a8ae4e48
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &im2col_shape,
+ float *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
+ uint8 *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, im2col_shape,
+ im2col_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &im2col_shape,
+ int8 *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 000000000..2b46b100c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const float *input1_data, const tflite::RuntimeShape &input2_shape,
+ const float *input2_data, const tflite::RuntimeShape &output_shape,
+ float *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape,
+ const float *input1_data,
+ const tflite::RuntimeShape &input2_shape,
+ const float *input2_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
new file mode 100644
index 000000000..6bbda4867
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ // Do nothing for mcu
+ (void)data;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ int32 input_beta_multiplier;
+ int input_beta_left_shift;
+ static const int kScaledDiffIntegerBits = 5;
+ tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+ &input_beta_multiplier, &input_beta_left_shift);
+
+ params->input_multiplier = input_beta_multiplier;
+ params->input_left_shift = input_beta_left_shift;
+ params->diff_min =
+ -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ // MARK: At this moment this operation doesn't support on mcu
+ assert(false && "Softmax NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+}
+
+template <>
+inline void Softmax<int8_t>(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+ const int32_t mult = params.input_multiplier;
+ const int32_t shift = params.input_left_shift;
+ const int32_t diff_min = params.diff_min;
+
+ arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 000000000..9a25a3c5d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,62 @@
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+ nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+
+ if (NOT CMSISSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+ return()
+ endif ()
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+ set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
+
+ add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+ target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+ "${CMSISSource_DIR}/CMSIS/NN/Include"
+ "${CMSISSource_DIR}/CMSIS/DSP/Include"
+ "${CMSISSource_DIR}/CMSIS/Core/Include")
+
+ target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index fb5e063a9..5647f4c44 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -129,6 +129,17 @@ void Conv2D::configure()
auto im2col = getOutputTensors()[1];
im2col->set_allocatable(false);
}
+
+ switch (_params.activation)
+ {
+ case Activation::NONE:
+ case Activation::RELU:
+ case Activation::RELU6:
+ case Activation::RELU_N1_TO_1:
+ break;
+ default:
+ throw std::runtime_error("Unsupported fused activation");
+ }
}
void Conv2D::execute() const
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index 277c280f5..0fe6ef795 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -663,6 +663,45 @@ TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::TANH;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index 6e83e37f6..586cfa1e1 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -32,7 +32,6 @@ void calculateActivationRange(Activation activation, float *activation_min, floa
switch (activation)
{
case Activation::NONE:
- case Activation::TANH:
*activation_min = std::numeric_limits<float>::lowest();
*activation_max = std::numeric_limits<float>::max();
break;
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
index 974283a2f..2cde99f5d 100644
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -23,7 +23,7 @@ target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER
target_link_libraries(${LUCI_INTERPRETER_LOADER}
PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
- PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common)
+ PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
if(NOT ENABLE_TEST)
return()
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index b55e7c504..a14442ed5 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -18,6 +18,7 @@
#include "loader/KernelBuilder.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
#include <loco/IR/Algorithm.h>
namespace luci_interpreter
@@ -155,6 +156,15 @@ void GraphLoader::loadTensors()
auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
node->name());
+ // If node has execution plan then read memory offsets for nodes
+ // from the beginning of shared memory buffer. Used in Static Memory Manager.
+ if (luci::has_execution_plan(node))
+ {
+ auto execution_plan = luci::get_execution_plan(node);
+ assert(!execution_plan.offsets().empty());
+ tensor->set_offset(execution_plan.offsets().front());
+ }
+
if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
{
size_t data_size{};
@@ -199,16 +209,54 @@ void GraphLoader::loadOperators()
KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
// Create kernels for executable nodes. This has to be done in execution order.
- for (const loco::Node *loco_node :
- loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph))))
+ auto graph = const_cast<loco::Graph *>(_graph);
+
+ auto const graph_nodes = loco::all_nodes(graph);
+
+ // Checking for execution plan in node annotations.
+ bool has_execution_annotation = true;
+ auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
+ const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ if (!luci::has_execution_plan(circle_node))
+ has_execution_annotation = false;
+ };
+ std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
+
+ if (has_execution_annotation)
{
- const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+ // Build ordered_nodes vector that stores the order of execution of graph nodes.
+ std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
- if (isExecutableNode(node))
+ auto const filler = [&ordered_nodes](auto const node) {
+ const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ auto const position = luci::get_execution_plan(circle_node).order_in_plan();
+ ordered_nodes.at(position) = circle_node;
+ };
+ std::for_each(begin(graph_nodes), end(graph_nodes), filler);
+
+ for (auto node : ordered_nodes)
{
- std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
- _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
- _runtime_graph->addKernel(std::move(kernel));
+ if (isExecutableNode(node))
+ {
+ std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+ _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+ _runtime_graph->addKernel(std::move(kernel));
+ }
+ }
+ }
+ else
+ {
+ // If it is impossible to build the execution order plan,
+ // then we use the default postorder_traversal approach.
+ for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
+ {
+ const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+ if (isExecutableNode(node))
+ {
+ std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+ _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+ _runtime_graph->addKernel(std::move(kernel));
+ }
}
}
}
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
index 71c8ef3e4..22fd1aca4 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -17,6 +17,7 @@
#include "Builders.h"
#include "kernels/Conv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
namespace luci_interpreter
{
@@ -31,13 +32,25 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
const Tensor *input = helper.getInputTensor(node->input());
const Tensor *filter = helper.getInputTensor(node->filter());
- const Tensor *bias = helper.getInputTensor(node->bias());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
Tensor *output = helper.getOutputTensor(node);
auto im2col =
std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
im2col->set_observable(false);
im2col->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for im2col temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in im2col.
+ im2col->set_offset(execution_plan.offsets().at(1));
+ }
Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
Conv2DParams params{};
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
index d936e12ba..94347082c 100644
--- a/compiler/luci-micro/CMakeLists.txt
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -13,11 +13,12 @@ endif()
set(CMAKE_ARM_OPTIONS
-DLUCI_INTERPRETER_STATIC=ON
-DLUCI_STATIC=ON
- "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/standalone/Toolchain.cmake"
+ -DBUILD_CMSIS_NN_FUNCTIONS=ON
+ -DTARGET_CPU=cortex-m7
+ "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake"
"-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
"-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
"-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
- -DCPU_ARCH=arm
-DC_COMPILER=${ARM_C_COMPILER}
-DCXX_COMPILER=${ARM_CXX_COMPILER}
-DASM_COMPILER=${ARM_ASM_COMPILER}
diff --git a/compiler/luci-micro/standalone/Toolchain.cmake b/compiler/luci-micro/standalone/Toolchain.cmake
deleted file mode 100644
index 2d23b5de5..000000000
--- a/compiler/luci-micro/standalone/Toolchain.cmake
+++ /dev/null
@@ -1,8 +0,0 @@
-set(CMAKE_SYSTEM_NAME Generic)
-
-set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
-set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
-set(CMAKE_C_COMPILER "${C_COMPILER}")
-set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
-set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
-set(CMAKE_OBJCOPY "${OBJCOPY}")
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt
index 9dcf1b55d..b92eefb40 100644
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -4,9 +4,9 @@
#
# Currently this feature is used for luci-interpreter MCU builds.
if (STATIC_LUCI)
- set(LIBRARY_TYPE "STATIC")
+ set(LUCI_LIBRARY_TYPE "STATIC")
else()
- set(LIBRARY_TYPE "SHARED")
+ set(LUCI_LIBRARY_TYPE "SHARED")
endif()
add_subdirectory(env)
diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt
index bba515551..7025db2e8 100644
--- a/compiler/luci/env/CMakeLists.txt
+++ b/compiler/luci/env/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_env ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_env ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_env PUBLIC include)
target_link_libraries(luci_env PRIVATE nncc_common)
install(TARGETS luci_env DESTINATION lib)
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
index 2b41a6248..a267d0e1f 100644
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -1,13 +1,12 @@
file(GLOB_RECURSE SOURCES "src/*.cpp")
-# TODO enable tests
-#file(GLOB_RECURSE TESTS "src/*.test.cpp")
-#list(REMOVE_ITEM SOURCES ${TESTS})
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_export ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_export ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_export PRIVATE src)
target_include_directories(luci_export PUBLIC include)
target_link_libraries(luci_export PRIVATE luci_lang)
@@ -26,13 +25,17 @@ install(TARGETS luci_export DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
-#if(NOT ENABLE_TEST)
-# return()
-#endif(NOT ENABLE_TEST)
-#
-#nnas_find_package(GTest REQUIRED)
-#
-#GTest_AddTest(luci_export_test ${TESTS})
-#target_include_directories(luci_export_test PRIVATE src)
-#target_link_libraries(luci_export_test luci_export)
-#target_link_libraries(luci_export_test oops)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_export_test ${TESTS})
+target_include_directories(luci_export_test PRIVATE src)
+target_link_libraries(luci_export_test luci_export)
+target_link_libraries(luci_export_test luci_plan)
+target_link_libraries(luci_export_test luci_lang)
+target_link_libraries(luci_export_test mio_circle)
+target_link_libraries(luci_export_test luci_env)
+target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/src/CircleExporter.test.cpp b/compiler/luci/export/src/CircleExporter.test.cpp
new file mode 100644
index 000000000..5898f9d65
--- /dev/null
+++ b/compiler/luci/export/src/CircleExporter.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleExporter.h"
+
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+#include <luci/IR/Nodes/CircleRelu.h>
+#include <luci/UserSettings.h>
+
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
+
+#include <gtest/gtest.h>
+
+class SampleGraphContract : public luci::CircleExporter::Contract
+{
+public:
+ SampleGraphContract() : luci::CircleExporter::Contract(), _buffer(new std::vector<char>)
+ {
+ // create needed entities
+ _g = loco::make_graph();
+ auto graph_input = _g->inputs()->create();
+ auto graph_output = _g->outputs()->create();
+ input_node = _g->nodes()->create<luci::CircleInput>();
+ output_node = _g->nodes()->create<luci::CircleOutput>();
+ relu_node = _g->nodes()->create<luci::CircleRelu>();
+
+ // link nodes and link them to graph
+ relu_node->features(input_node);
+ output_node->from(relu_node);
+ input_node->index(graph_input->index());
+ output_node->index(graph_output->index());
+
+ // Set needed properties
+ input_node->name("input");
+ output_node->name("output");
+ relu_node->name("relu");
+ input_node->dtype(loco::DataType::FLOAT32);
+
+ graph_input->shape({1, 2, 3, 4});
+ graph_input->dtype(loco::DataType::FLOAT32);
+
+ graph_output->shape({1, 2, 3, 4});
+ graph_output->dtype(loco::DataType::FLOAT32);
+ }
+
+ loco::Graph *graph(void) const override { return _g.get(); }
+
+public:
+ bool store(const char *ptr, const size_t size) const override
+ {
+ _buffer->resize(size);
+ std::copy(ptr, ptr + size, _buffer->begin());
+ return true;
+ }
+
+ const std::vector<char> &get_buffer() { return *_buffer; }
+
+public:
+ luci::CircleInput *input_node;
+ luci::CircleOutput *output_node;
+ luci::CircleRelu *relu_node;
+
+private:
+ std::unique_ptr<loco::Graph> _g;
+ std::unique_ptr<std::vector<char>> _buffer;
+};
+
+TEST(CircleExport, export_execution_plan)
+{
+ SampleGraphContract contract;
+ uint32_t reference_order = 1;
+ uint32_t reference_offset = 100u;
+ luci::add_execution_plan(contract.relu_node,
+ luci::CircleNodeExecutionPlan(reference_order, {reference_offset}));
+
+ luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, true);
+ luci::CircleExporter exporter;
+
+ exporter.invoke(&contract);
+
+ ASSERT_FALSE(contract.get_buffer().empty());
+ std::unique_ptr<circle::ModelT> model(circle::GetModel(contract.get_buffer().data())->UnPack());
+ ASSERT_NE(model.get(), nullptr);
+ ASSERT_EQ(model->metadata[0]->name, "ONE_execution_plan_table");
+ auto metadata_buffer = model->metadata[0]->buffer;
+ auto &buffer = model->buffers[metadata_buffer]->data;
+ ASSERT_EQ(buffer.size(), 20);
+ uint32_t *raw_table_contents = reinterpret_cast<uint32_t *>(buffer.data());
+
+ auto num_entries = raw_table_contents[0];
+ ASSERT_EQ(num_entries, 1);
+ auto node_id = raw_table_contents[1];
+ ASSERT_EQ(node_id, 1); // relu node is second (aka id 1) in tological sort in exporter
+ auto node_plan_size = raw_table_contents[2];
+ ASSERT_EQ(node_plan_size, 2); // 1 for execution order, 1 for memory offset value
+ auto node_plan_order = raw_table_contents[3];
+ ASSERT_EQ(node_plan_order,
+ reference_order); // this value goes from CircleNodeExecutionPlan initialization
+ auto node_plan_offset = raw_table_contents[4];
+ ASSERT_EQ(node_plan_offset,
+ reference_offset); // this value goes from CircleNodeExecutionPlan initialization
+}
+
+TEST(CircleExport, export_execution_plan_nosetting_NEG)
+{
+ SampleGraphContract contract;
+ uint32_t reference_order = 1;
+ uint32_t reference_offset = 100u;
+ luci::add_execution_plan(contract.relu_node,
+ luci::CircleNodeExecutionPlan(reference_order, {reference_offset}));
+
+ luci::UserSettings::settings()->set(luci::UserSettings::ExecutionPlanGen, false);
+ luci::CircleExporter exporter;
+
+ exporter.invoke(&contract);
+
+ ASSERT_FALSE(contract.get_buffer().empty());
+ std::unique_ptr<circle::ModelT> model(circle::GetModel(contract.get_buffer().data())->UnPack());
+ ASSERT_NE(model.get(), nullptr);
+ ASSERT_EQ(model->metadata.size(), 0);
+}
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 1df569d11..6630cab9f 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_import ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_import ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_import PRIVATE src)
target_include_directories(luci_import PUBLIC include)
target_link_libraries(luci_import PUBLIC luci_lang)
@@ -33,3 +33,6 @@ GTest_AddTest(luci_import_test ${TESTS})
target_include_directories(luci_import_test PRIVATE src)
target_link_libraries(luci_import_test luci_import)
target_link_libraries(luci_import_test oops)
+target_link_libraries(luci_import_test luci_plan)
+target_link_libraries(luci_import_test luci_lang)
+target_link_libraries(luci_import_test mio_circle)
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h
index b9697fb86..fb38ba90b 100644
--- a/compiler/luci/import/include/luci/Import/CircleReader.h
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -36,10 +36,19 @@ namespace luci
{
bool is_valid(const circle::OperatorCodeT &opcode);
+bool is_valid(const circle::OperatorCode *opcode);
+
bool is_custom(const circle::OperatorCodeT &opcode);
+bool is_custom(const circle::OperatorCode *opcode);
+
std::string opcode_name(const circle::OperatorCodeT &opcode);
+std::string opcode_name(const circle::OperatorCode *opcode);
+
const char *tensor_name(const circle::TensorT &tensor);
+const char *tensor_name(const circle::Tensor *tensor);
+
const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor);
+const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor);
loco::DataType luci_datatype(circle::TensorType type);
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type);
@@ -49,29 +58,70 @@ luci::CircleFullyConnected::WeightsFormat
luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format);
std::unique_ptr<CircleQuantParam>
luci_quantparam(const circle::QuantizationParametersT *quantization);
+std::unique_ptr<CircleQuantParam>
+luci_quantparam(const circle::QuantizationParameters *quantization);
/// @brief Copy common tensor attributes such as name, type, etc. to node.
void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node);
+void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node);
+
+/**
+ * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
+ */
+template <typename T> class VectorWrapper
+{
+public:
+ explicit VectorWrapper(const flatbuffers::Vector<T> *ptr);
+
+ const T *data() const;
+ uint32_t size() const;
+
+ using iterator = typename flatbuffers::Vector<T>::const_iterator;
+ iterator begin() const;
+ iterator end() const;
+
+ using value_type = typename flatbuffers::Vector<T>::return_type;
+ value_type at(uint32_t i) const;
+ value_type operator[](uint32_t i) const;
+
+ bool null() const;
+ bool empty() const;
+
+private:
+ const flatbuffers::Vector<T> *_vector;
+};
+
+template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
+{
+ return VectorWrapper<T>(vec);
+}
/**
* @brief Loads Circle file and provides helpers to access attributes
*/
class CircleReader
{
-private:
+private: // unpack API
using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>;
using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>;
+private: // direct API
+ using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
+ using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
+ using CircleOperators = VectorWrapper<flatbuffers::Offset<circle::Operator>>;
+ using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
+ using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
+
using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
public:
CircleReader() = default;
-public:
+public: // unpack API
const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; }
const CircleBuffers_t &buffers() const { return _model->buffers; }
const CircleTensors_t &tensors() const { return _current_subgraph->tensors; }
@@ -89,6 +139,20 @@ public:
circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const;
std::string opcode_name(const circle::OperatorT &op) const;
+public: // direct API
+ CircleOperatorCodes native_opcodes() const { return wrap(_native_model->operator_codes()); }
+ CircleBuffers native_buffers() const { return wrap(_native_model->buffers()); }
+ CircleTensors native_tensors() const { return wrap(_native_subgraph->tensors()); }
+ CircleOperators native_operators() const { return wrap(_native_subgraph->operators()); }
+ VectorWrapper<int32_t> native_inputs() const { return wrap(_native_subgraph->inputs()); }
+ VectorWrapper<int32_t> native_outputs() const { return wrap(_native_subgraph->outputs()); }
+ std::string native_name() const { return _native_subgraph->name()->str(); }
+ circle::DataFormat native_data_format() const { return _native_subgraph->data_format(); }
+ CircleMetadataSet native_metadata() const { return wrap(_native_model->metadata()); }
+
+ circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
+ std::string opcode_name(const circle::Operator *op) const;
+
public:
bool parse(const circle::Model *model);
bool select_subgraph(uint32_t subgraph);
@@ -97,8 +161,9 @@ private:
std::unique_ptr<const circle::ModelT> _model;
const circle::SubGraphT *_current_subgraph{nullptr};
- const circle::Model *_model_ptr{nullptr};
+ const circle::Model *_native_model{nullptr};
const CircleTensorsPtr_t *_tensors_ptr{nullptr};
+ const circle::SubGraph *_native_subgraph{nullptr};
};
} // namespace luci
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp
index 6c9bf3af9..14917ba06 100644
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -29,12 +29,26 @@ bool is_valid(const circle::OperatorCodeT &opcode)
return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
}
+bool is_valid(const circle::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
+}
+
bool is_custom(const circle::OperatorCodeT &opcode)
{
circle::BuiltinOperator code = opcode.builtin_code;
return (code == circle::BuiltinOperator_CUSTOM);
}
+bool is_custom(const circle::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return (code == circle::BuiltinOperator_CUSTOM);
+}
+
std::string opcode_name(const circle::OperatorCodeT &opcode)
{
if (!is_valid(opcode))
@@ -56,6 +70,30 @@ std::string opcode_name(const circle::OperatorCodeT &opcode)
return circle::EnumNameBuiltinOperator(code);
}
+std::string opcode_name(const circle::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+
+ if (!is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid)";
+ return oss.str();
+ }
+
+ if (is_custom(opcode))
+ {
+ auto custom_code = opcode->custom_code()->str();
+ if (custom_code.empty())
+ return "(invalid custom)";
+
+ return custom_code;
+ }
+
+ circle::BuiltinOperator code = opcode->builtin_code();
+ return circle::EnumNameBuiltinOperator(code);
+}
+
const char *tensor_name(const circle::TensorT &tensor)
{
static const char *kEmptyTensorName = "(noname)";
@@ -66,11 +104,30 @@ const char *tensor_name(const circle::TensorT &tensor)
return kEmptyTensorName;
}
+const char *tensor_name(const circle::Tensor *tensor)
+{
+ assert(tensor != nullptr);
+
+ static const char *kEmptyTensorName = "(noname)";
+ const auto tensor_name = tensor->name()->c_str();
+
+ if (!std::string(tensor_name).empty())
+ return tensor_name;
+
+ return kEmptyTensorName;
+}
+
const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor)
{
return tensor.quantization.get();
}
+const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor)
+{
+ assert(tensor != nullptr);
+ return tensor->quantization();
+}
+
loco::DataType luci_datatype(const circle::TensorType type)
{
switch (type)
@@ -235,6 +292,16 @@ luci_quantparam(const circle::QuantizationParametersT *quantization)
return nullptr;
}
+std::unique_ptr<CircleQuantParam> luci_quantparam(const circle::QuantizationParameters *qparams)
+{
+ // create temporary unpacked API object
+ assert(qparams != nullptr);
+ circle::QuantizationParametersT quantization;
+ qparams->UnPackTo(&quantization);
+
+ return luci_quantparam(&quantization);
+}
+
std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParametersT *sparsity)
{
assert(sparsity);
@@ -257,6 +324,16 @@ std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParamete
return sparsityparam;
}
+std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParameters *sparparam)
+{
+ // create temporary unpacked API object
+ assert(sparparam != nullptr);
+ circle::SparsityParametersT sparsity;
+ sparparam->UnPackTo(&sparsity);
+
+ return luci_sparsityparam(&sparsity);
+}
+
void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
{
node->name(tensor_name(tensor));
@@ -292,6 +369,45 @@ void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
}
}
+void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
+{
+ assert(tensor != nullptr);
+
+ node->name(tensor_name(tensor));
+ node->dtype(luci_datatype(tensor->type()));
+
+ const auto tensor_shape_signature = wrap(tensor->shape_signature());
+ const auto tensor_shape = wrap(tensor->shape());
+ assert(tensor_shape_signature.size() == 0 ||
+ tensor_shape_signature.size() == tensor_shape.size());
+
+ const auto dims = tensor_shape; // in NHWC
+ node->rank(dims.size());
+ for (uint32_t r = 0; r < dims.size(); ++r)
+ {
+ if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
+ node->dim(r).unset();
+ else
+ node->dim(r).set(dims[r]);
+ }
+
+ const auto quantization = tensor->quantization();
+ if (quantization != nullptr)
+ {
+ auto quantparam = luci_quantparam(quantization);
+ if (quantparam)
+ node->quantparam(std::move(quantparam));
+ }
+
+ const auto sparsity = tensor->sparsity();
+ if (sparsity != nullptr)
+ {
+ auto sparsityparam = luci_sparsityparam(sparsity);
+ if (sparsityparam)
+ node->sparsityparam(std::move(sparsityparam));
+ }
+}
+
circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const
{
const auto &op_codes = opcodes();
@@ -326,7 +442,7 @@ bool CircleReader::parse(const circle::Model *model)
_model.reset(model->UnPack());
// for direct pointer access
- _model_ptr = model;
+ _native_model = model;
return true;
}
@@ -342,12 +458,72 @@ bool CircleReader::select_subgraph(uint32_t sgindex)
_current_subgraph = _model->subgraphs[sgindex].get();
// for direct pointer access
- auto subgraphs = _model_ptr->subgraphs();
- const circle::SubGraph *subgraph = (*subgraphs)[sgindex];
+ auto subgraphs = _native_model->subgraphs();
+ assert(subgraphs != nullptr);
+
+ _native_subgraph = subgraphs->Get(sgindex);
+ assert(_native_subgraph != nullptr);
- _tensors_ptr = subgraph->tensors();
+ _tensors_ptr = _native_subgraph->tensors();
return true;
}
+template <typename T>
+VectorWrapper<T>::VectorWrapper(const flatbuffers::Vector<T> *ptr) : _vector(ptr)
+{
+ // Do nothing
+}
+
+template <typename T> uint32_t VectorWrapper<T>::size() const
+{
+ return null() ? 0 : _vector->size();
+}
+
+template <typename T> const T *VectorWrapper<T>::data() const
+{
+ return null() ? nullptr : _vector->data();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::begin() const
+{
+ return null() ? iterator(nullptr, 0) : _vector->begin();
+}
+
+template <typename T> typename VectorWrapper<T>::iterator VectorWrapper<T>::end() const
+{
+ return null() ? begin() : _vector->end();
+}
+
+template <typename T> typename VectorWrapper<T>::value_type VectorWrapper<T>::at(uint32_t i) const
+{
+ if (i >= size())
+ {
+ // TODO find better error message
+ throw std::range_error("Access to prohibited vector element");
+ }
+
+ return _vector->Get(i);
+}
+
+template <typename T>
+typename VectorWrapper<T>::value_type VectorWrapper<T>::operator[](uint32_t i) const
+{
+ return at(i);
+}
+
+template <typename T> bool VectorWrapper<T>::null() const { return _vector == nullptr; }
+template <typename T> bool VectorWrapper<T>::empty() const { return size() == 0; }
+
+#define REGISTER_WRAPPER(T) template class VectorWrapper<T>
+REGISTER_WRAPPER(flatbuffers::Offset<circle::SubGraph>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Buffer>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Tensor>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Operator>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::OperatorCode>);
+REGISTER_WRAPPER(flatbuffers::Offset<circle::Metadata>);
+REGISTER_WRAPPER(int32_t);
+REGISTER_WRAPPER(uint8_t);
+#undef REGISTER_WRAPPER
+
} // namespace luci
diff --git a/compiler/luci/import/src/CircleReader.test.cpp b/compiler/luci/import/src/CircleReader.test.cpp
new file mode 100644
index 000000000..0ce5b600b
--- /dev/null
+++ b/compiler/luci/import/src/CircleReader.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/CircleReader.h"
+
+#include <gtest/gtest.h>
+
+TEST(VectorWrapperTest, basic_pattern)
+{
+ auto fb_builder = flatbuffers::FlatBufferBuilder();
+
+ const std::vector<int32_t> data = {1, 4, 2, 0, 7};
+ auto const vec_offset = fb_builder.CreateVector(data.data(), data.size());
+ auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset);
+
+ auto const wrapper = luci::wrap(vec_pointer);
+
+ ASSERT_EQ(wrapper.size(), data.size());
+ ASSERT_TRUE(std::equal(wrapper.begin(), wrapper.end(), data.begin()));
+}
+
+TEST(VectorWrapperTest, wrong_data_NEG)
+{
+ auto fb_builder = flatbuffers::FlatBufferBuilder();
+
+ std::vector<int32_t> data = {1, 4, 2, 0, 7};
+ auto const vec_offset = fb_builder.CreateVector(data.data(), data.size());
+ auto const vec_pointer = GetTemporaryPointer(fb_builder, vec_offset);
+
+ auto const wrapper = luci::wrap(vec_pointer);
+
+ // change data
+ std::reverse(data.begin(), data.end());
+
+ ASSERT_EQ(wrapper.size(), data.size());
+ ASSERT_FALSE(std::equal(wrapper.begin(), wrapper.end(), data.begin()));
+}
+
+TEST(VectorWrapperTest, null_pointer)
+{
+ flatbuffers::Vector<int32_t> *vec_pointer = nullptr;
+ auto const wrapper = luci::wrap(vec_pointer);
+
+ ASSERT_TRUE(wrapper.null());
+ ASSERT_TRUE(wrapper.empty());
+}
+
+TEST(VectorWrapperTest, prohibited_access_NEG)
+{
+ flatbuffers::Vector<uint8_t> *vec_pointer = nullptr;
+ auto const wrapper = luci::wrap(vec_pointer);
+
+ ASSERT_ANY_THROW(wrapper.at(0));
+}
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp
index 8eae5fcf4..3f7f78591 100644
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -119,6 +119,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
}
// Create CircleConst nodes for constant tensors.
+ // NOTE Origin is intentionally not provided for constants.
for (uint32_t i = 0; i < tensors.size(); ++i)
{
luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i);
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp
index 8366546f0..d963b4d49 100644
--- a/compiler/luci/import/src/Importer.test.cpp
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -16,9 +16,12 @@
#include "luci/Importer.h"
-#include <loco.h>
+#include <luci/IR/CircleNode.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
#include <gtest/gtest.h>
+#include <mio/circle/schema_generated.h>
+#include <flatbuffers/flatbuffers.h>
TEST(TensorFlowLiteImport, Dummy)
{
@@ -26,3 +29,283 @@ TEST(TensorFlowLiteImport, Dummy)
SUCCEED();
}
+
+// helpers for flatbuffers
+namespace
+{
+
+struct BasicCircleModel
+{
+ std::unique_ptr<circle::ModelT> model;
+
+ BasicCircleModel()
+ {
+ model = std::make_unique<circle::ModelT>();
+ model->buffers.push_back(std::make_unique<circle::BufferT>());
+ model->description = "nnpackage";
+ model->version = 0;
+ }
+
+ uint32_t add_subgraph()
+ {
+ model->subgraphs.push_back(std::make_unique<circle::SubGraphT>());
+ model->subgraphs.back()->name = "";
+ model->subgraphs.back()->data_format = circle::DataFormat_CHANNELS_LAST;
+ return model->subgraphs.size() - 1;
+ }
+
+ void add_subgraph_inputs(uint32_t subgraph_id, const std::vector<uint32_t> &inputs)
+ {
+ model->subgraphs[subgraph_id]->inputs.assign(inputs.begin(), inputs.end());
+ }
+
+ void add_subgraph_outputs(uint32_t subgraph_id, const std::vector<uint32_t> &outputs)
+ {
+ model->subgraphs[subgraph_id]->outputs.assign(outputs.begin(), outputs.end());
+ }
+
+ uint32_t add_builtin_opcode(circle::BuiltinOperator opcode)
+ {
+ uint32_t id = model->operator_codes.size();
+ model->operator_codes.push_back(std::make_unique<circle::OperatorCodeT>());
+ model->operator_codes[id]->builtin_code = opcode;
+ model->operator_codes[id]->version = 1;
+ return id;
+ }
+
+ uint32_t add_buffer()
+ {
+ model->buffers.push_back(std::make_unique<circle::BufferT>());
+ return model->buffers.size() - 1;
+ }
+
+ uint32_t add_float_tensor(uint32_t graph_id, const std::vector<int32_t> &shape,
+ uint32_t buffer_id)
+ {
+ auto &graph = model->subgraphs[graph_id];
+ uint32_t idx = graph->tensors.size();
+ graph->tensors.push_back(std::make_unique<circle::TensorT>());
+ graph->tensors[idx]->shape = shape;
+ graph->tensors[idx]->type = circle::TensorType_FLOAT32;
+ graph->tensors[idx]->buffer = buffer_id;
+ graph->tensors[idx]->name = std::to_string(idx);
+ graph->tensors[idx]->quantization.reset(nullptr);
+ graph->tensors[idx]->is_variable = false;
+ graph->tensors[idx]->sparsity.reset(nullptr);
+ (void)graph->tensors[idx]->shape_signature;
+ return idx;
+ }
+
+ uint32_t add_builtin_operator(uint32_t graph_id, uint32_t opcode_id,
+ const std::vector<uint32_t> &inputs,
+ const std::vector<uint32_t> &outputs)
+ {
+ auto &graph = model->subgraphs[graph_id];
+ auto idx = graph->operators.size();
+ graph->operators.push_back(std::make_unique<circle::OperatorT>());
+ graph->operators[idx]->opcode_index = opcode_id;
+ graph->operators[idx]->inputs.assign(inputs.begin(), inputs.end());
+ graph->operators[idx]->outputs.assign(outputs.begin(), outputs.end());
+ graph->operators[idx]->builtin_options.Reset();
+ (void)graph->operators[idx]->custom_options;
+ graph->operators[idx]->custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
+ (void)graph->operators[idx]->mutating_variable_inputs;
+ (void)graph->operators[idx]->intermediates;
+ return idx;
+ }
+
+ uint32_t add_plan_metadata(uint32_t buffer_id)
+ {
+ static_assert(sizeof(uint32_t) == 4, "metadata is stored in blocks of 32 bit unsiged ints");
+ uint32_t idx = model->metadata.size();
+ model->metadata.push_back(std::make_unique<circle::MetadataT>());
+ model->metadata[idx]->name = "ONE_execution_plan_table";
+ model->metadata[idx]->buffer = buffer_id;
+ model->buffers[buffer_id]->data.resize(4);
+ auto &entries_count = *reinterpret_cast<uint32_t *>(model->buffers[buffer_id]->data.data());
+ entries_count = 0;
+ return idx;
+ }
+
+ void add_plan_entry(uint32_t plan_buffer_id, uint32_t execution_order,
+ const std::vector<uint32_t> &offsets)
+ {
+ auto &buffer = model->buffers[plan_buffer_id]->data;
+ auto old_size = buffer.size();
+ assert(old_size % 4 == 0);
+ assert(old_size > 0);
+
+ // Allocate space for new entry:
+ // 4 bytes for entry id
+ // 4 bytes for entry size
+ // 4 bytes for execution order
+ // offsets.size() * 4 bytes for offsets
+ buffer.resize(old_size + 12 + offsets.size() * 4);
+ uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data());
+ *number_of_entries_ptr += 1;
+
+ uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size);
+
+ entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id
+ entry_data_ptr[1] = 1 + offsets.size(); // entry size
+ entry_data_ptr[2] = execution_order; // execution order
+ std::copy(offsets.begin(), offsets.end(), entry_data_ptr + 3);
+ }
+};
+
+struct SimpleRELUModel : public BasicCircleModel
+{
+ SimpleRELUModel()
+ {
+ auto relu_opcode_id = add_builtin_opcode(circle::BuiltinOperator_RELU);
+
+ uint32_t subgraph_id = add_subgraph();
+
+ auto input_buffer_id = add_buffer();
+ auto output_buffer_id = add_buffer();
+
+ auto input_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, input_buffer_id);
+ auto output_tensor_idx = add_float_tensor(subgraph_id, {1, 2, 3, 4}, output_buffer_id);
+
+ add_subgraph_inputs(subgraph_id, {input_tensor_idx});
+ add_subgraph_outputs(subgraph_id, {output_tensor_idx});
+
+ add_builtin_operator(subgraph_id, relu_opcode_id, {0}, {1});
+ }
+};
+
+} // namespace
+
+/**
+ * This test checks that one op RELU model with execution plan is successfully imported
+ */
+TEST(TensorFlowLiteImport, simple_plan)
+{
+ SimpleRELUModel model;
+ auto metadata_buffer_id = model.add_buffer();
+ model.add_plan_metadata(metadata_buffer_id);
+
+ model.add_plan_entry(metadata_buffer_id, 1, {100});
+ model.add_plan_entry(metadata_buffer_id, 2, {300});
+ model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+ flatbuffers::FlatBufferBuilder fbb;
+ auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+ circle::FinishModelBuffer(fbb, model_offset);
+
+ auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+ luci::Importer import;
+
+ auto luci_module = import.importModule(model_ptr);
+
+ auto main_graph = luci_module->graph();
+ for (int i = 0; i < main_graph->nodes()->size(); ++i)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(main_graph->nodes()->at(i));
+ switch (node->opcode())
+ {
+ case luci::CircleOpcode::CIRCLEINPUT:
+ {
+ ASSERT_TRUE(luci::has_execution_plan(node));
+ auto plan = luci::get_execution_plan(node);
+ ASSERT_EQ(plan.order_in_plan(), 1);
+ ASSERT_EQ(plan.offsets().size(), 1);
+ ASSERT_EQ(plan.offsets()[0], 100);
+ break;
+ }
+ case luci::CircleOpcode::CIRCLEOUTPUT:
+ {
+ ASSERT_TRUE(luci::has_execution_plan(node));
+ auto plan = luci::get_execution_plan(node);
+ ASSERT_EQ(plan.order_in_plan(), 3);
+ ASSERT_EQ(plan.offsets().size(), 1);
+ ASSERT_EQ(plan.offsets()[0], 200);
+ break;
+ }
+ case luci::CircleOpcode::RELU:
+ {
+ ASSERT_TRUE(luci::has_execution_plan(node));
+ auto plan = luci::get_execution_plan(node);
+ ASSERT_EQ(plan.order_in_plan(), 2);
+ ASSERT_EQ(plan.offsets().size(), 1);
+ ASSERT_EQ(plan.offsets()[0], 300);
+ break;
+ }
+ default:
+ FAIL();
+ }
+ }
+}
+
+/**
+ * This test checks that model with incomplete execution plan is successfully imported
+ */
+TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG)
+{
+ SimpleRELUModel model;
+ auto metadata_buffer_id = model.add_buffer();
+ model.add_plan_metadata(metadata_buffer_id);
+
+ model.add_plan_entry(metadata_buffer_id, 1, {100});
+
+ flatbuffers::FlatBufferBuilder fbb;
+ auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+ circle::FinishModelBuffer(fbb, model_offset);
+
+ auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+ luci::Importer import;
+
+ auto luci_module = import.importModule(model_ptr);
+
+ auto main_graph = luci_module->graph();
+ for (int i = 0; i < main_graph->nodes()->size(); ++i)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(main_graph->nodes()->at(i));
+ switch (node->opcode())
+ {
+ case luci::CircleOpcode::CIRCLEINPUT:
+ {
+ ASSERT_TRUE(luci::has_execution_plan(node));
+ auto plan = luci::get_execution_plan(node);
+ ASSERT_EQ(plan.order_in_plan(), 1);
+ ASSERT_EQ(plan.offsets().size(), 1);
+ ASSERT_EQ(plan.offsets()[0], 100);
+ break;
+ }
+ case luci::CircleOpcode::CIRCLEOUTPUT:
+ case luci::CircleOpcode::RELU:
+ {
+ ASSERT_FALSE(luci::has_execution_plan(node));
+ break;
+ }
+ default:
+ FAIL();
+ }
+ }
+}
+
+/**
+ * This test checks that corrupted execution plan induce exception
+ */
+TEST(TensorFlowLiteImport, corrupted_plan_NEG)
+{
+ SimpleRELUModel model;
+ auto metadata_buffer_id = model.add_buffer();
+ model.add_plan_metadata(metadata_buffer_id);
+
+ model.add_plan_entry(metadata_buffer_id, 1, {100});
+ model.add_plan_entry(metadata_buffer_id, 2, {300});
+ model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+ // corrupt data
+ *reinterpret_cast<uint32_t *>(model.model->buffers[metadata_buffer_id]->data.data()) = 4;
+
+ flatbuffers::FlatBufferBuilder fbb;
+ auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+ circle::FinishModelBuffer(fbb, model_offset);
+
+ auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+ luci::Importer import;
+
+ ASSERT_ANY_THROW(import.importModule(model_ptr));
+}
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt
index 433b7cd4e..2f6ee23fa 100644
--- a/compiler/luci/lang/CMakeLists.txt
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_lang ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_lang ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_lang PRIVATE src)
target_include_directories(luci_lang PUBLIC include)
target_link_libraries(luci_lang PUBLIC loco)
diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt
index b64a0651e..bbd733f1e 100644
--- a/compiler/luci/log/CMakeLists.txt
+++ b/compiler/luci/log/CMakeLists.txt
@@ -1,11 +1,11 @@
# TODO Find how to test logging framework
file(GLOB_RECURSE SOURCES "src/*.cpp")
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_log ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_log ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_log PUBLIC include)
target_link_libraries(luci_log PUBLIC hermes)
target_link_libraries(luci_log PRIVATE hermes_std)
diff --git a/compiler/luci/log/include/luci/Log.h b/compiler/luci/log/include/luci/Log.h
index e148810d8..ddb34f47f 100644
--- a/compiler/luci/log/include/luci/Log.h
+++ b/compiler/luci/log/include/luci/Log.h
@@ -48,7 +48,6 @@ public:
private:
bool _show_warn = true;
- bool _show_info = false;
int _show_verbose = 0;
};
@@ -67,8 +66,8 @@ private:
#define LOGGER(name) ::luci::Logger name{::luci::LoggingContext::get()};
// TODO Support FATAL, ERROR
-#define INFO(name) HERMES_INFO(name)
-#define WARN(name) HERMES_WARN(name)
+#define INFO(name) HERMES_VERBOSE(name, 3)
+#define WARN(name) HERMES_VERBOSE(name, 2)
#define VERBOSE(name, lv) HERMES_VERBOSE(name, lv)
// WARNING!
diff --git a/compiler/luci/log/src/Log.cpp b/compiler/luci/log/src/Log.cpp
index c26bf307b..0cc45e8bf 100644
--- a/compiler/luci/log/src/Log.cpp
+++ b/compiler/luci/log/src/Log.cpp
@@ -33,11 +33,6 @@ namespace
*/
template <typename T> T safecast(const char *, const T &);
-template <> bool safecast<bool>(const char *s, const bool &value)
-{
- return (s == nullptr) ? value : (std::stoi(s) != 0);
-}
-
template <> int safecast<int>(const char *s, const int &value)
{
return (s == nullptr) ? value : std::stoi(s);
@@ -68,9 +63,6 @@ LoggerConfig::LoggerConfig()
_show_warn = !settings->get(luci::UserSettings::Key::MuteWarnings);
- // Turn on info logging if LUCI_LOG is set as non-zero value
- _show_info = safecast<bool>(std::getenv("LUCI_LOG"), false);
-
// Turn on verbose logging if LUCI_LOG is set to some level
// VERBOSE(l, 1) will be visible with LUCI_LOG=2 and VERBOSE(l, 2) with LUCI_LOG=3 and so on
_show_verbose = safecast<int>(std::getenv("LUCI_LOG"), 0);
@@ -87,6 +79,8 @@ void LoggerConfig::configure(const hermes::Source *source, hermes::Source::Setti
void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) const
{
+ // TODO remove deprecated codes
+#if 0
setting.filter(hermes::SeverityCategory::FATAL).reject_all();
setting.filter(hermes::SeverityCategory::ERROR).reject_all();
setting.filter(hermes::SeverityCategory::WARN).reject_all();
@@ -106,6 +100,16 @@ void LoggerConfig::configure(const Logger *, hermes::Source::Setting &setting) c
{
setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
}
+#endif
+ setting.reject_all();
+ setting.filter(hermes::SeverityCategory::FATAL).accept_upto(_show_verbose);
+ setting.filter(hermes::SeverityCategory::ERROR).accept_upto(_show_verbose);
+ if (_show_warn)
+ {
+ setting.filter(hermes::SeverityCategory::WARN).accept_upto(_show_verbose);
+ }
+ setting.filter(hermes::SeverityCategory::INFO).accept_upto(_show_verbose);
+ setting.filter(hermes::SeverityCategory::VERBOSE).accept_upto(_show_verbose);
}
} // namespace luci
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt
index 4d801b046..aed9fb79b 100644
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,11 +1,11 @@
# TODO Find how to test logging-ex utility
file(GLOB_RECURSE SOURCES "src/*.cpp")
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_logex ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_logex ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_logex PUBLIC include)
target_link_libraries(luci_logex PUBLIC loco)
target_link_libraries(luci_logex PUBLIC locop)
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt
index eacbe1ccc..ec8e0b0d6 100644
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_partition ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_partition ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_partition PRIVATE src)
target_include_directories(luci_partition PUBLIC include)
target_link_libraries(luci_partition PUBLIC luci_lang)
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp
index b767c77ae..c517bf93f 100644
--- a/compiler/luci/partition/src/PartitionMerge.cpp
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -58,6 +58,9 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
// we need to clone this CircleConst for each graph of the group.
if (dynamic_cast<const luci::CircleConst *>(input) != nullptr)
continue;
+ // Skip also for OutputExclude
+ if (dynamic_cast<const luci::CircleOutputExclude *>(input) != nullptr)
+ continue;
auto input_group = pgroups->group_of(input);
// NOTE: all the nodes should be registered and return should be valid group.
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp
index e0b4e8e0d..0080873e6 100644
--- a/compiler/luci/partition/src/PartitionPGroups.cpp
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -35,6 +35,17 @@ class IsVirtualNode final : public luci::CircleNodeVisitor<bool>
public:
bool visit(const luci::CircleInput *) final { return true; }
bool visit(const luci::CircleOutput *) final { return true; }
+ // For multiple outputs
+ bool visit(const luci::CircleCustomOut *) final { return true; }
+ bool visit(const luci::CircleIfOut *) final { return true; }
+ bool visit(const luci::CircleNonMaxSuppressionV4Out *) final { return true; }
+ bool visit(const luci::CircleNonMaxSuppressionV5Out *) final { return true; }
+ bool visit(const luci::CircleSplitOut *) final { return true; }
+ bool visit(const luci::CircleSplitVOut *) final { return true; }
+ bool visit(const luci::CircleTopKV2Out *) final { return true; }
+ bool visit(const luci::CircleUniqueOut *) final { return true; }
+ bool visit(const luci::CircleUnpackOut *) final { return true; }
+ bool visit(const luci::CircleWhileOut *) final { return true; }
// TODO add all virtual nodes
// default is false
@@ -58,6 +69,91 @@ bool check_allocate_partition(const luci::CircleNode *node)
return true;
}
+class FindGroupToFollow final : public luci::CircleNodeVisitor<const std::string &>
+{
+public:
+ FindGroupToFollow(const luci::PartitionTable &partition, luci::PGroups *pgroups)
+ : _partition(partition), _pgroups(pgroups)
+ {
+ // NOTHING TODO
+ }
+
+private:
+ const std::string &groupof(const luci::CircleNode *input) const
+ {
+ auto group = _pgroups->node2group[input];
+ assert(not group.empty());
+ if (group.empty())
+ return _partition.default_group;
+ return _pgroups->node2group[input];
+ }
+
+public:
+#define IMPLEMENT(CLASS) \
+ const std::string &visit(const luci::CLASS *node) final \
+ { \
+ auto input = loco::must_cast<luci::CircleNode *>(node->input()); \
+ return groupof(input); \
+ }
+
+ IMPLEMENT(CircleCustomOut);
+ IMPLEMENT(CircleIfOut);
+ IMPLEMENT(CircleNonMaxSuppressionV4Out);
+ IMPLEMENT(CircleNonMaxSuppressionV5Out);
+ IMPLEMENT(CircleSplitOut);
+ IMPLEMENT(CircleSplitVOut);
+ IMPLEMENT(CircleTopKV2Out);
+ IMPLEMENT(CircleUniqueOut);
+ IMPLEMENT(CircleUnpackOut);
+ IMPLEMENT(CircleWhileOut);
+
+#undef IMPLEMENT
+
+ // return empty for nothing to do
+ const std::string &visit(const luci::CircleNode *) final { return _empty_str; }
+
+private:
+ const luci::PartitionTable &_partition;
+ luci::PGroups *_pgroups = nullptr;
+ std::string _empty_str;
+};
+
+} // namespace
+
+namespace
+{
+
+void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx)
+{
+ auto pgroup = std::make_unique<luci::PGroup>();
+ pgroup->group = group;
+ pgroup->id = idx + 1;
+
+ auto pnode = std::make_unique<luci::PNode>();
+ pnode->node = node;
+ pnode->group = group;
+ pnode->pgroup = pgroup.get();
+
+ pgroup->pnodes.push_back(std::move(pnode));
+
+ // Set input of PGroup
+ for (uint32_t in = 0; in < node->arity(); ++in)
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
+ // this input maybe CircleInput in source graph
+ // --> not confident this is safe
+ pgroup->inputs.push_back(input);
+ }
+ // Set output of PGroup: node itself or multiple virtual outputs
+ // TODO support multiple virtual outputs
+ pgroup->outputs.push_back(node);
+
+ pgroups->node2group[node] = group;
+ pgroups->id2pgroup[pgroup->id] = pgroup.get();
+
+ pgroups->pgroups.push_back(std::move(pgroup));
+}
+
} // namespace
namespace luci
@@ -120,6 +216,8 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
<< std::endl;
+ append(node, pgroups.get(), group, idx);
+#if 0
auto pgroup = std::make_unique<luci::PGroup>();
pgroup->group = group;
pgroup->id = idx + 1;
@@ -147,6 +245,7 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
pgroups->id2pgroup[pgroup->id] = pgroup.get();
pgroups->pgroups.push_back(std::move(pgroup));
+#endif
}
else
{
@@ -156,6 +255,22 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
}
}
+ // handle for virtual nodes like multiple outputs
+ // these nodes should follow group of the input
+ for (uint32_t idx = 0; idx < nodes->size(); ++idx)
+ {
+ auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
+
+ // for virtual nodes like CircleUnpackOut should follow it's input (owner)
+ // or just set to default
+ FindGroupToFollow query(partition, pgroups.get());
+ const auto &group = node->accept(&query);
+ if (not group.empty())
+ {
+ append(node, pgroups.get(), group, idx);
+ }
+ }
+
return std::move(pgroups);
}
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index 2361bb4f5..b8b406a38 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "FlatBuffers NOT FOUND")
return()
@@ -8,11 +8,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_pass ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_pass PRIVATE src)
target_include_directories(luci_pass PUBLIC include)
target_link_libraries(luci_pass PUBLIC loco)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 917cacae9..658563ecf 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -93,6 +93,8 @@ public:
Quantize_tensor_names,
Quantize_scales,
Quantize_zero_points,
+ Quantize_input_type,
+ Quantize_output_type,
// sparsify
Sparsify_tensor_name,
@@ -104,9 +106,6 @@ public:
// convert NCHW to NHWC
NCHW_to_NHWC_input_shape,
NCHW_to_NHWC_output_shape,
-
- Quantize_input_dtype = Quantize_input_model_dtype, // TODO Remove this
- Quantize_output_dtype = Quantize_output_model_dtype, // TODO Remove this
};
virtual ~Options() = default;
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
index d618a07b6..648abad70 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -31,11 +31,23 @@ namespace luci
*/
class QuantizeWithMinMaxPass : public logo::Pass
{
+ // For backward-compatibility
+ // TODO Remove this constructor
public:
QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
QuantizationGranularity granularity)
- : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
- granularity}
+ : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
+ _granularity{granularity}, _input_type{output_model_dtype}, _output_type{output_model_dtype}
+ {
+ // DO NOTHING
+ }
+
+public:
+ QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
+ QuantizationGranularity granularity, loco::DataType input_type,
+ loco::DataType output_type)
+ : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
+ _granularity{granularity}, _input_type{input_type}, _output_type{output_type}
{
// DO NOTHING
}
@@ -45,9 +57,15 @@ public:
bool run(loco::Graph *graph);
private:
+ void set_input_type(loco::Graph *graph) const;
+ void set_output_type(loco::Graph *graph) const;
+
+private:
loco::DataType _input_model_dtype;
loco::DataType _output_model_dtype;
QuantizationGranularity _granularity;
+ loco::DataType _input_type;
+ loco::DataType _output_type;
};
} // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 5d0c92625..75f04b3b5 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -468,12 +468,20 @@ void CircleOptimizer::quantize(loco::Graph *g) const
static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
+ static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
+ static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
auto input_model_dtype =
_options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
auto output_model_dtype =
_options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+ auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
+ if (input_type.empty())
+ input_type = output_model_dtype;
+ auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
+ if (output_type.empty())
+ output_type = output_model_dtype;
if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
throw std::runtime_error("Unsupported input type. List of supported input types: " +
@@ -487,13 +495,21 @@ void CircleOptimizer::quantize(loco::Graph *g) const
throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
to_string(qwmm_supported_granularity));
+ if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(qwmm_supported_input_type));
+
+ if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(qwmm_supported_output_type));
+
if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
str_to_dtype(output_model_dtype) != loco::DataType::U8)
throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
- luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_model_dtype),
- str_to_dtype(output_model_dtype),
- str_to_granularity(granularity));
+ luci::QuantizeWithMinMaxPass quantizer(
+ str_to_dtype(input_model_dtype), str_to_dtype(output_model_dtype),
+ str_to_granularity(granularity), str_to_dtype(input_type), str_to_dtype(output_type));
quantizer.run(g);
// Post-quantization optimizations
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
index 66e341518..d83973cd5 100644
--- a/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.cpp
@@ -72,13 +72,6 @@ bool fuse_activation_function(luci::CircleNode *node)
else
return false;
}
- else if (opcode == luci::CircleOpcode::TANH)
- {
- if (fused_act == luci::FusedActFunc::NONE)
- target_func = luci::FusedActFunc::TANH;
- else
- return false;
- }
else
return false;
@@ -98,8 +91,9 @@ bool FuseActivationFunctionPass::run(loco::Graph *g)
{
auto circle_node = static_cast<luci::CircleNode *>(node);
auto opcode = circle_node->opcode();
+ // TANH is not supported as CONV fused with TANH is not supported in luci-interpreter
if (opcode == luci::CircleOpcode::RELU || opcode == luci::CircleOpcode::RELU6 ||
- opcode == luci::CircleOpcode::RELU_N1_TO_1 || opcode == luci::CircleOpcode::TANH)
+ opcode == luci::CircleOpcode::RELU_N1_TO_1)
{
if (fuse_activation_function(circle_node))
changed = true;
diff --git a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
index 56b414143..9e0a80df1 100644
--- a/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
+++ b/compiler/luci/pass/src/FuseActivationFunctionPass.test.cpp
@@ -86,6 +86,47 @@ protected:
luci::CircleConst *_conv2_b = nullptr;
};
+class ConvTanhConvGraphlet
+{
+public:
+ ConvTanhConvGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ _conv1 = g->nodes()->create<luci::CircleConv2D>();
+ _conv2 = g->nodes()->create<luci::CircleConv2D>();
+ _tanh = g->nodes()->create<luci::CircleTanh>();
+ _conv1_f = g->nodes()->create<luci::CircleConst>();
+ _conv1_b = g->nodes()->create<luci::CircleConst>();
+ _conv2_f = g->nodes()->create<luci::CircleConst>();
+ _conv2_b = g->nodes()->create<luci::CircleConst>();
+
+ _conv1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ _conv1->name("conv1");
+ _conv2->name("conv2");
+ _tanh->name("tanh");
+ _conv1_f->name("conv1f");
+ _conv1_b->name("conv1b");
+ _conv2_f->name("conv2f");
+ _conv2_b->name("conv2b");
+ }
+
+public:
+ luci::CircleTanh *tanh() { return _tanh; }
+ luci::CircleConv2D *conv1() { return _conv1; }
+ luci::CircleConv2D *conv2() { return _conv2; }
+
+protected:
+ luci::CircleConv2D *_conv1 = nullptr;
+ luci::CircleConv2D *_conv2 = nullptr;
+ luci::CircleTanh *_tanh = nullptr;
+ luci::CircleConst *_conv1_f = nullptr;
+ luci::CircleConst *_conv1_b = nullptr;
+ luci::CircleConst *_conv2_f = nullptr;
+ luci::CircleConst *_conv2_b = nullptr;
+};
+
class FuseActTestGraph : public TestIOGraph, public ConvReluConvGraphlet
{
public:
@@ -110,6 +151,30 @@ public:
}
};
+class FuseTanhActTestGraph : public TestIOGraph, public ConvTanhConvGraphlet
+{
+public:
+ FuseTanhActTestGraph() = default;
+
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ ConvTanhConvGraphlet::init(g());
+
+ _conv1->input(input());
+ _conv1->filter(_conv1_f);
+ _conv1->bias(_conv1_b);
+
+ _tanh->x(_conv1);
+
+ _conv2->input(_tanh);
+ _conv2->filter(_conv2_f);
+ _conv2->bias(_conv2_b);
+
+ output()->from(_conv2);
+ }
+};
+
class ConvHasMultiSuccGraph : public TestIOGraph, public ConvReluConvGraphlet
{
public:
@@ -190,3 +255,15 @@ TEST(FusePreActivationBatchNorm, fuse_activation_function_tanh_NEG)
// Relu input Conv2D already has activation function
EXPECT_FALSE(pass.run(g.g()));
}
+
+TEST(FusePreActivationBatchNorm, fuse_tanh_NEG)
+{
+ FuseTanhActTestGraph g;
+ luci::FuseActivationFunctionPass pass;
+
+ g.init();
+
+ // Tanh should not be fused
+ // This can be changed when CONV+TANH is supported by luci-interpreter
+ EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index be81732f8..c3552ec52 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -20,6 +20,7 @@
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Log.h>
#include <oops/UserExn.h>
@@ -63,6 +64,52 @@ void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc
}
}
+// Create a Quantize Op whose
+// dtype is out_type
+// shape is the same with node
+// qparam is computed using node's min/max
+luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
+{
+ auto quantize = node->graph()->nodes()->create<CircleQuantize>();
+ quantize->name(node->name() + "_Quantize");
+ quantize->dtype(out_type);
+ quantize->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ quantize->dim(i).set(node->dim(i).value());
+
+ quantize->shape_status(luci::ShapeStatus::VALID);
+
+ auto qparam = node->quantparam();
+ assert(qparam); // FIX_CALLER_UNLESS
+ assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
+ assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
+ auto min = qparam->min[0];
+ auto max = qparam->max[0];
+
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ if (out_type == loco::DataType::U8)
+ {
+ compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ }
+ else
+ {
+ assert(out_type == loco::DataType::S16);
+ compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ }
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+
+ quantize->quantparam(std::move(quantparam));
+
+ return quantize;
+}
+
} // namespace
namespace luci
@@ -743,8 +790,6 @@ struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
scaling_factor = scaling_factor < 1 ? 1.0f : std::round(scaling_factor);
}
- circle_node->quantparam()->min.clear();
- circle_node->quantparam()->max.clear();
circle_node->quantparam()->scale.push_back(scaling_factor);
circle_node->quantparam()->zerop.push_back(zp);
}
@@ -1467,6 +1512,97 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant
quant_input(&CirclePadV2::constant_values, 2);
}
+void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
+{
+ auto inputs = g->inputs();
+ for (auto node : loco::input_nodes(g))
+ {
+ auto input = loco::must_cast<luci::CircleInput *>(node);
+ if (input->dtype() == _input_type)
+ continue;
+
+ // Bool type is not quantizable
+ if (input->dtype() == loco::DataType::BOOL)
+ continue;
+
+ // Insert Quantize Op
+ auto quant_op = create_quantize_op(input, input->dtype());
+ loco::replace(input).with(quant_op);
+ quant_op->input(input);
+
+ // TODO Set a proper origin (Quantize should have its own Origin)
+ {
+ auto succs = loco::succs(quant_op);
+ assert(succs.size() > 0);
+ auto succ = loco::must_cast<luci::CircleNode *>(*succs.begin());
+ luci::add_origin(quant_op, luci::get_origin(succ));
+ }
+
+ // Requantize input
+ {
+ auto quantparam = input->quantparam();
+ assert(quantparam);
+ assert(quantparam->min.size() == 1); // only support layer-wise quant
+ assert(quantparam->max.size() == 1); // only support layer-wise quant
+ auto min = quantparam->min[0];
+ auto max = quantparam->max[0];
+
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ if (_input_type == loco::DataType::U8)
+ {
+ compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ }
+ else
+ {
+ assert(_input_type == loco::DataType::S16);
+ compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ }
+ input->dtype(_input_type);
+ input->quantparam()->scale[0] = scaling_factor;
+ input->quantparam()->zerop[0] = zp;
+ }
+
+ auto graph_input = inputs->at(input->index());
+ graph_input->dtype(_input_type);
+ }
+}
+
+void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
+{
+ auto outputs = g->outputs();
+ for (auto node : loco::output_nodes(g))
+ {
+ auto output = loco::must_cast<luci::CircleOutput *>(node);
+ if (output->dtype() == _output_type)
+ continue;
+
+ // Bool type is not quantizable
+ if (output->dtype() == loco::DataType::BOOL)
+ continue;
+
+ auto from = loco::must_cast<luci::CircleNode *>(output->from());
+
+ // The last Op is not quantizable Op (ex: ArgMax)
+ if (not from->quantparam())
+ continue;
+
+ // Insert Quantize Op
+ auto quant_op = create_quantize_op(from, _output_type);
+ loco::replace(from).with(quant_op);
+ quant_op->input(from);
+
+ // TODO Set a proper origin (Quantize should have its own Origin)
+ luci::add_origin(quant_op, luci::get_origin(from));
+
+ auto graph_output = outputs->at(output->index());
+ graph_output->dtype(_output_type);
+ }
+}
+
bool QuantizeWithMinMaxPass::run(loco::Graph *g)
{
LOGGER(l);
@@ -1539,6 +1675,23 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
}
}
+ // Set input type
+ set_input_type(g);
+
+ // Set output type
+ set_output_type(g);
+
+ // Remove min/max values
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ if (auto qparam = circle_node->quantparam())
+ {
+ qparam->min.clear();
+ qparam->max.clear();
+ }
+ }
+
INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
return false; // one time run
}
diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt
index 9ca6dcb41..d4c8f6391 100644
--- a/compiler/luci/plan/CMakeLists.txt
+++ b/compiler/luci/plan/CMakeLists.txt
@@ -1,10 +1,12 @@
file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_plan ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_plan ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_plan PRIVATE src)
target_include_directories(luci_plan PUBLIC include)
target_link_libraries(luci_plan PUBLIC loco)
@@ -13,3 +15,12 @@ target_link_libraries(luci_plan PUBLIC luci_lang)
install(TARGETS luci_plan DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_plan_test ${TESTS})
+target_link_libraries(luci_plan_test luci_plan)
diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
new file mode 100644
index 000000000..d7ccf255f
--- /dev/null
+++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Plan/CircleNodeExecutionPlan.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+TEST(CircleNodeExecutionPlan, basic_fields)
+{
+ luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7});
+
+ ASSERT_EQ(plan.order_in_plan(), 123);
+ ASSERT_THAT(plan.offsets(), testing::ElementsAre(4, 5, 6, 7));
+
+ plan.order_in_plan(321);
+ plan.offsets({1, 2, 3, 4});
+
+ ASSERT_EQ(plan.order_in_plan(), 321);
+ ASSERT_THAT(plan.offsets(), testing::ElementsAre(1, 2, 3, 4));
+}
+
+TEST(CircleNodeExecutionPlan, add_extract_plan)
+{
+ auto g = loco::make_graph();
+ auto add = g->nodes()->create<luci::CircleAdd>();
+
+ ASSERT_FALSE(luci::has_execution_plan(add));
+
+ luci::CircleNodeExecutionPlan plan(123, {4, 5, 6, 7});
+ luci::add_execution_plan(add, plan);
+
+ ASSERT_TRUE(luci::has_execution_plan(add));
+
+ auto extracted_plan = luci::get_execution_plan(add);
+
+ ASSERT_EQ(extracted_plan.order_in_plan(), 123);
+ ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(4, 5, 6, 7));
+}
+
+TEST(CircleNodeExecutionPlan, extract_plan_NEG)
+{
+ auto g = loco::make_graph();
+ auto add = g->nodes()->create<luci::CircleAdd>();
+
+ ASSERT_FALSE(luci::has_execution_plan(add));
+
+ ASSERT_ANY_THROW(luci::get_execution_plan(add));
+}
+
+TEST(CircleNodeExecutionPlan, double_set_plan_NEG)
+{
+ auto g = loco::make_graph();
+ auto add = g->nodes()->create<luci::CircleAdd>();
+
+ ASSERT_FALSE(luci::has_execution_plan(add));
+
+ luci::CircleNodeExecutionPlan plan1(123, {4, 5, 6, 7});
+ luci::add_execution_plan(add, plan1);
+ ASSERT_TRUE(luci::has_execution_plan(add));
+
+ luci::CircleNodeExecutionPlan plan2(321, {1, 2, 3, 4});
+ luci::add_execution_plan(add, plan2);
+ ASSERT_TRUE(luci::has_execution_plan(add));
+
+ auto extracted_plan = luci::get_execution_plan(add);
+ ASSERT_EQ(extracted_plan.order_in_plan(), 321);
+ ASSERT_THAT(extracted_plan.offsets(), testing::ElementsAre(1, 2, 3, 4));
+}
diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt
index ae604ab90..f8a0cc005 100644
--- a/compiler/luci/profile/CMakeLists.txt
+++ b/compiler/luci/profile/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_profile ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_profile ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_profile PRIVATE src)
target_include_directories(luci_profile PUBLIC include)
target_link_libraries(luci_profile PUBLIC loco)
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt
index f48210b9c..0e6097f96 100644
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -2,11 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-if (NOT LIBRARY_TYPE)
- set(LIBRARY_TYPE "SHARED")
-endif(NOT LIBRARY_TYPE)
+if (NOT LUCI_LIBRARY_TYPE)
+ set(LUCI_LIBRARY_TYPE "SHARED")
+endif(NOT LUCI_LIBRARY_TYPE)
-add_library(luci_service ${LIBRARY_TYPE} ${SOURCES})
+add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_service PRIVATE src)
target_include_directories(luci_service PUBLIC include)
target_link_libraries(luci_service PUBLIC luci_lang)
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build
index 79f32302a..90dfa77b8 100644
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -38,6 +38,16 @@ def _get_parser():
_utils._add_default_arg(parser)
+ opt_name_list = _utils._get_optimization_list(get_name=True)
+ opt_name_list = ['-' + s for s in opt_name_list]
+ if not opt_name_list:
+ opt_help_message = '(No available optimization options)'
+ else:
+ opt_help_message = '(Available optimization options: ' + ', '.join(
+ opt_name_list) + ')'
+ opt_help_message = 'optimization name to use ' + opt_help_message
+ parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message)
+
return parser
@@ -55,6 +65,14 @@ def _verify_arg(parser, args):
# check if required arguments is given
if not _utils._is_valid_attr(args, 'config'):
parser.error('-C/--config argument is required')
+ # check if given optimization option exists
+ opt_name_list = _utils._get_optimization_list(get_name=True)
+ opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
+ if _utils._is_valid_attr(args, 'O'):
+ if ' ' in getattr(args, 'O'):
+ parser.error('Not allowed to have space in the optimization name')
+ if not getattr(args, 'O') in opt_name_list:
+ parser.error('Invalid optimization option')
def _get_driver_name(driver_name):
@@ -101,6 +119,27 @@ def _verify_cfg(driver_list, config):
raise AssertionError('Only one import-* driver can be executed')
+# verify given optimization option file
+def _verify_opt(args):
+ if _utils._is_valid_attr(args, 'O'):
+ config = configparser.ConfigParser()
+ config.optionxform = str
+ opt_name_path_dic = dict(
+ zip(_utils._get_optimization_list(get_name=True),
+ _utils._get_optimization_list()))
+ parsed = config.read(opt_name_path_dic['O' + getattr(args, 'O')])
+ # check if given optimization option file exists
+ if not parsed:
+ raise FileNotFoundError('Not found given optimization configuration file')
+ # check if given optimization option file only has `one-optimize` section
+ if len(config.sections()) == 1 and config.sections()[0] == 'one-optimize':
+ pass
+ else:
+ raise AssertionError(
+ 'Optimization configuration file only allowed to have a \'one-optimize\' section'
+ )
+
+
def main():
# parse arguments
# since the configuration file path is required first,
@@ -121,6 +160,9 @@ def main():
]
_verify_cfg(drivers, config)
+ # verify optimization option file
+ _verify_opt(args)
+
# get sections to run
section_to_run = []
for d in drivers:
@@ -132,6 +174,8 @@ def main():
for section in section_to_run:
driver_path = os.path.join(dir_path, _get_driver_name(section))
cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
+ if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'):
+ cmd += ['-O', getattr(args, 'O')]
_utils._run(cmd)
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize
index 6ce973c37..a64abff19 100644
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -64,6 +64,9 @@ def _get_parser():
# opt = (option_name, help_message)
circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
+ # optimization option from one-build
+ parser.add_argument('-O', type=str, help=argparse.SUPPRESS)
+
return parser
@@ -113,6 +116,15 @@ def _optimize(args):
_utils._run(circle2circle_cmd, err_prefix="circle2circle", logfile=f)
+def _parse_opt(args):
+ if _utils._is_valid_attr(args, 'O'):
+ opt_name_path_dic = dict(
+ zip(_utils._get_optimization_list(get_name=True),
+ _utils._get_optimization_list()))
+ config_path = opt_name_path_dic['O' + getattr(args, 'O')]
+ _utils._parse_cfg_and_overwrite(config_path, 'one-optimize', args)
+
+
def main():
# parse arguments
parser = _get_parser()
@@ -121,6 +133,11 @@ def main():
# parse configuration file
_utils._parse_cfg(args, 'one-optimize')
+ # parse optimization file
+ # NOTE if there is a `one-optimize` section in above configuration file as well,
+ # it will be overwritten
+ _parse_opt(args)
+
# verify arguments
_verify_arg(parser, args)
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index cd623a6f8..22d4ddb0e 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -39,13 +39,13 @@ def _get_parser():
# input and output path.
parser.add_argument(
- '-i', '--input_path', type=str, help='full filepath of the input file')
+ '-i', '--input_path', type=str, help='full filepath of the input circle model')
parser.add_argument(
'-d',
'--input_data',
type=str,
help=
- 'full filepath of the input data file. if not specified, run with random input data.'
+ 'full filepath of the input data used for post-training quantization. if not specified, run with random input data.'
)
parser.add_argument(
'-f',
@@ -55,7 +55,10 @@ def _get_parser():
'file format of input data. h5/hdf5 (default), list/filelist (a text file where a file path of input data is written in each line), or dir/directory (a directory where input data are saved)'
)
parser.add_argument(
- '-o', '--output_path', type=str, help='full filepath of the output file')
+ '-o',
+ '--output_path',
+ type=str,
+ help='full filepath of the output quantized model')
# argument for profiling
parser.add_argument(
@@ -70,41 +73,77 @@ def _get_parser():
quantization_group.add_argument(
'--input_dtype',
type=str,
- help='input data type (supported: float32, default=float32)')
+ help=
+ 'input model data type (supported: float32, default=float32). Deprecated (Use input_model_dtype)'
+ )
+ quantization_group.add_argument(
+ '--input_model_dtype',
+ type=str,
+ help='input model data type (supported: float32, default=float32)')
quantization_group.add_argument(
'--quantized_dtype',
type=str,
- help='output quantized data type (supported: uint8, int16, default=uint8)')
+ help='data type of output quantized model (supported: uint8, int16, default=uint8)'
+ )
quantization_group.add_argument(
'--granularity',
type=str,
- help='quantize granularity (supported: layer, channel, default=layer)')
+ help='quantization granularity (supported: layer, channel, default=layer)')
+ quantization_group.add_argument(
+ '--input_type',
+ type=str,
+ help=
+ 'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+ )
+ quantization_group.add_argument(
+ '--output_type',
+ type=str,
+ help=
+ 'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+ )
quantization_group.add_argument(
- '--min_percentile', type=str, help='minimum percentile (0.0~100.0, default=1.0)')
+ '--min_percentile',
+ type=str,
+ help=
+ 'minimum percentile (0.0~100.0, default=1.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
+ )
quantization_group.add_argument(
- '--max_percentile', type=str, help='maximum percentile (0.0~100.0, default=99.0)')
+ '--max_percentile',
+ type=str,
+ help=
+ 'maximum percentile (0.0~100.0, default=99.0). Algorithm parameter for calibration. This is valid when calibration algorithm is percentile.'
+ )
quantization_group.add_argument(
'--mode',
type=str,
- help='record mode (supported: percentile/moving_average, default=percentile)')
+ help=
+ "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max."
+ )
- # arguments for force_quantparam
- parser.add_argument(
+ # arguments for force_quantparam option
+ force_quantparam_group = parser.add_argument_group(
+ 'arguments for force_quantparam option')
+
+ force_quantparam_group.add_argument(
'--force_quantparam',
action='store_true',
- help='write quantparam to the specified tensor')
- parser.add_argument(
+ help=
+ 'overwrite quantparam (scale, zero_point) to the specified tensor in the quantized model.'
+ )
+ force_quantparam_group.add_argument(
'--tensor_name', type=str, action='append', help='tensor name (string)')
- parser.add_argument('--scale', type=float, action='append', help='scale (float)')
- parser.add_argument(
+ force_quantparam_group.add_argument(
+ '--scale', type=float, action='append', help='scale (float)')
+ force_quantparam_group.add_argument(
'--zero_point', type=int, action='append', help='zero point (int)')
return parser
def _set_default_values(args):
- if not _utils._is_valid_attr(args, 'input_dtype'):
- setattr(args, 'input_dtype', 'float32')
+ if not _utils._is_valid_attr(args, 'input_model_dtype') and not _utils._is_valid_attr(
+ args, 'input_dtype'):
+ setattr(args, 'input_model_dtype', 'float32')
if not _utils._is_valid_attr(args, 'quantized_dtype'):
setattr(args, 'quantized_dtype', 'uint8')
if not _utils._is_valid_attr(args, 'granularity'):
@@ -174,7 +213,10 @@ def _quantize(args):
circle_quantizer_cmd.append('--verbose')
# quantize_dequantize_weights
circle_quantizer_cmd.append('--quantize_dequantize_weights')
- if _utils._is_valid_attr(args, 'input_dtype'):
+ # Use input_model_dtype if it exists. Use input_dtype otherwise.
+ if _utils._is_valid_attr(args, 'input_model_dtype'):
+ circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+ elif _utils._is_valid_attr(args, 'input_dtype'):
circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
if _utils._is_valid_attr(args, 'quantized_dtype'):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
@@ -243,12 +285,21 @@ def _quantize(args):
circle_quantizer_cmd.append('--verbose')
# quantize_dequantize_weights
circle_quantizer_cmd.append('--quantize_with_minmax')
- if _utils._is_valid_attr(args, 'input_dtype'):
+ # Use input_model_dtype if it exists. Use input_dtype otherwise.
+ if _utils._is_valid_attr(args, 'input_model_dtype'):
+ circle_quantizer_cmd.append(getattr(args, 'input_model_dtype'))
+ elif _utils._is_valid_attr(args, 'input_dtype'):
circle_quantizer_cmd.append(getattr(args, 'input_dtype'))
if _utils._is_valid_attr(args, 'quantized_dtype'):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
if _utils._is_valid_attr(args, 'granularity'):
circle_quantizer_cmd.append(getattr(args, 'granularity'))
+ if _utils._is_valid_attr(args, 'input_type'):
+ circle_quantizer_cmd.append('--input_type')
+ circle_quantizer_cmd.append(getattr(args, 'input_type'))
+ if _utils._is_valid_attr(args, 'output_type'):
+ circle_quantizer_cmd.append('--output_type')
+ circle_quantizer_cmd.append(getattr(args, 'output_type'))
# input and output path
circle_quantizer_cmd.append(tmp_output_path_2)
if _utils._is_valid_attr(args, 'output_path'):
diff --git a/compiler/one-cmds/tests/OONE-BUILD_014.cfg b/compiler/one-cmds/tests/OONE-BUILD_014.cfg
new file mode 100644
index 000000000..a39aae071
--- /dev/null
+++ b/compiler/one-cmds/tests/OONE-BUILD_014.cfg
@@ -0,0 +1,2 @@
+[one-optimize]
+make_batchnorm_gamma_positive=True
diff --git a/compiler/one-cmds/tests/one-build_014.cfg b/compiler/one-cmds/tests/one-build_014.cfg
new file mode 100644
index 000000000..f09145ec3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_014.cfg
@@ -0,0 +1,22 @@
+[one-build]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+make_batchnorm_gamma_positive=False
diff --git a/compiler/one-cmds/tests/one-build_014.test b/compiler/one-cmds/tests/one-build_014.test
new file mode 100644
index 000000000..b3acbf56b
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_014.test
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use `OONE-BUILD_014` optimization option
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+clean_envir()
+{
+ rm -rf ../optimization/OONE-BUILD_014.cfg
+ if [ "$OPT_ALREADY_EXIST" = false ]; then
+ rm -rf ../optimization
+ fi
+}
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ clean_envir
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-build_014.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+if [ ! -d "../optimization" ]; then
+ mkdir -p ../optimization
+ OPT_ALREADY_EXIST=false
+fi
+
+cp OONE-BUILD_014.cfg ../optimization
+
+# run test
+LUCI_LOG=5 one-build -C ${configfile} -OONE-BUILD_014 > ${filename}.log 2>&1
+
+clean_envir
+
+if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-build_neg_007.test b/compiler/one-cmds/tests/one-build_neg_007.test
new file mode 100644
index 000000000..5c5d9af7a
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_007.test
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Valid optimization option but invalid configuration file path
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ rm -rf ../optimization/OONE_BUILD_NEG_007.cfg
+ if [ "$OPT_ALREADY_EXIST" = false ]; then
+ rm -rf ../optimization
+ fi
+ if grep -q "Not found given configuration file" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+if [ ! -d "../optimization" ]; then
+ mkdir -p ../optimization
+ OPT_ALREADY_EXIST=false
+fi
+
+
+touch ../optimization/OONE_BUILD_NEG_007.cfg
+
+configfile=".."
+
+# run test
+one-build -C ${configfile} -OONE_BUILD_NEG_007 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_008.test b/compiler/one-cmds/tests/one-build_neg_008.test
new file mode 100644
index 000000000..8ed287150
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_008.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Invalid optimization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid optimization option" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+# run test
+one-build -C ${configfile} -OONE_BUILD_NEG_008 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-build_neg_009.test b/compiler/one-cmds/tests/one-build_neg_009.test
new file mode 100644
index 000000000..8d9c8318e
--- /dev/null
+++ b/compiler/one-cmds/tests/one-build_neg_009.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have space in the optimization name
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Not allowed to have space in the optimization name" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+# run test
+one-build -C ${configfile} "-O SPACE OPTION" > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_007.test b/compiler/one-cmds/tests/one-quantize_007.test
new file mode 100644
index 000000000..34ae92df4
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_007.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.q16.iq8.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+ /bin/bash one-import_001.test > /dev/null 2>&1
+ return_code=$?
+ if [[ ${return_code} != 0 ]]; then
+ trap_err_onexit
+ fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--input_type uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_008.test b/compiler/one-cmds/tests/one-quantize_008.test
new file mode 100644
index 000000000..aff6bcf39
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_008.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.q16.oq8.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+ /bin/bash one-import_001.test > /dev/null 2>&1
+ return_code=$?
+ if [[ ${return_code} != 0 ]]; then
+ trap_err_onexit
+ fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--output_type uint8 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test
new file mode 100644
index 000000000..ac920a4fe
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_019.test
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Unsupported input type" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.neg_019.circle"
+
+rm -rf ${outputfile}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype int16 \
+--granularity channel \
+--input_type float32 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_023.cfg b/compiler/one-cmds/tests/onecc_023.cfg
new file mode 100644
index 000000000..edbcc6f78
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_023.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_023.q16.iq8.circle
+quantized_dtype=int16
+granularity=channel
+input_type=uint8
diff --git a/compiler/one-cmds/tests/onecc_023.test b/compiler/one-cmds/tests/onecc_023.test
new file mode 100644
index 000000000..50b3b1c6f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_023.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_023.cfg"
+outputfile="inception_v3.onecc_023.q16.iq8.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
index efb01a210..5d84c2bd5 100644
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -16,6 +16,8 @@
import argparse
import configparser
+import glob
+import ntpath
import os
import subprocess
import sys
@@ -124,9 +126,34 @@ def _is_valid_attr(args, attr):
return hasattr(args, attr) and getattr(args, attr)
+def _parse_cfg_and_overwrite(config_path, section, args):
+ """
+ parse given section of configuration file and set the values of args.
+ Even if the values parsed from the configuration file already exist in args,
+ the values are overwritten.
+ """
+ if config_path == None:
+ # DO NOTHING
+ return
+ config = configparser.ConfigParser()
+ # make option names case sensitive
+ config.optionxform = str
+ parsed = config.read(config_path)
+ if not parsed:
+ raise FileNotFoundError('Not found given configuration file')
+ if not config.has_section(section):
+ raise AssertionError('configuration file doesn\'t have \'' + section +
+ '\' section')
+ for key in config[section]:
+ setattr(args, key, config[section][key])
+ # TODO support accumulated arguments
+
+
def _parse_cfg(args, driver_name):
"""parse configuration file. If the option is directly given to the command line,
- the option is processed prior to the configuration file."""
+ the option is processed prior to the configuration file.
+ That is, if the values parsed from the configuration file already exist in args,
+ the values are ignored."""
if _is_valid_attr(args, 'config'):
config = configparser.ConfigParser()
config.optionxform = str
@@ -290,3 +317,54 @@ def _run(cmd, err_prefix=None, logfile=None):
logfile.write(line)
if p.returncode != 0:
sys.exit(p.returncode)
+
+
+def _remove_prefix(str, prefix):
+ if str.startswith(prefix):
+ return str[len(prefix):]
+ return str
+
+
+def _remove_suffix(str, suffix):
+ if str.endswith(suffix):
+ return str[:-len(suffix)]
+ return str
+
+
+def _get_optimization_list(get_name=False):
+ """
+ returns a list of optimization. If `get_name` is True,
+ only basename without extension is returned rather than full file path.
+
+ [one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test
+
+ Optimization options must be placed in `optimization` folder
+ """
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+
+ # optimization folder
+ files = [f for f in glob.glob(dir_path + '/../optimization/O*.cfg', recursive=True)]
+ # exclude if the name has space
+ files = [s for s in files if not ' ' in s]
+
+ opt_list = []
+ for cand in files:
+ base = ntpath.basename(cand)
+ if os.path.isfile(cand) and os.access(cand, os.R_OK):
+ opt_list.append(cand)
+
+ if get_name == True:
+ # NOTE the name includes prefix 'O'
+ # e.g. O1, O2, ONCHW not just 1, 2, NCHW
+ opt_list = [ntpath.basename(f) for f in opt_list]
+ opt_list = [_remove_suffix(s, '.cfg') for s in opt_list]
+
+ return opt_list
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index 7028bd9ac..ada5ff5d1 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -207,7 +207,7 @@ struct CookParams
std::string noname;
};
-template <typename T> void cook_graph(const T &graph, CookParams &cp)
+template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, CookParams &cp)
{
LOGGER(l);
@@ -537,6 +537,8 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
subgraph_builder.add_name(name);
subgraph_vec.emplace_back(subgraph_builder.Finish());
+
+ return symbol_table;
}
} // namespace
@@ -574,6 +576,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
// Operation-related
std::vector<flatbuffers::Offset<::tflite::OperatorCode>> code_vec;
+ // SignatureDef-related
+ std::vector<flatbuffers::Offset<::tflite::SignatureDef>> signdef_vec;
+
// Graphs-related
std::vector<flatbuffers::Offset<::tflite::SubGraph>> subgraph_vec;
@@ -617,13 +622,18 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
buffer_vec.emplace_back(buffer_builder.Finish());
}
+ // symbol_tables stores symbol_table of each sub graph
+ // this is used to find tensor ID(index) with tensor name
+ std::vector<std::map<std::string, int32_t>> symbol_tables;
+
//
// Create Main graph
//
CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder,
builtin_code_map, custom_code_vec, "main"};
- cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
+ auto table = cook_graph<::tflchef::ModelRecipe>(model_recipe, cp);
+ symbol_tables.push_back(table);
//
// Create subgraphs if exist
@@ -638,11 +648,97 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
CookParams cp{buffer_vec, code_vec, subgraph_vec, flatbuffer_builder,
builtin_code_map, custom_code_vec, stringStream.str()};
- cook_graph<::tflchef::Graph>(graph, cp);
+ auto table = cook_graph<::tflchef::Graph>(graph, cp);
+ symbol_tables.push_back(table);
+ }
+
+ // Create Signature-Def
+ //
+ for (int s = 0; s < model_recipe.signature_def_size(); ++s)
+ {
+ // load from recipe
+ const auto &rec_signature_def = model_recipe.signature_def(s);
+
+ std::vector<flatbuffers::Offset<::tflite::TensorMap>> tensormap_inputs;
+ std::vector<flatbuffers::Offset<::tflite::TensorMap>> tensormap_outputs;
+
+ // which subgraph index to cook
+ auto subgraph_index = 0;
+ if (rec_signature_def.has_subgraph_index())
+ {
+ subgraph_index = rec_signature_def.subgraph_index();
+ }
+ assert(subgraph_index < symbol_tables.size());
+ auto &symbol_table = symbol_tables[subgraph_index];
+
+ // cook for inputs
+ for (int si = 0; si < rec_signature_def.inputs_size(); ++si)
+ {
+ // recipe for input TensorMap
+ auto rec_tm_input = rec_signature_def.inputs(si);
+ auto name = flatbuffer_builder->CreateString(rec_tm_input.name());
+ uint32_t tensor_index = 0;
+ // either tensor or tensor_index should exist
+ assert(rec_tm_input.has_tensor() || rec_tm_input.has_tensor_index());
+ if (rec_tm_input.has_tensor())
+ {
+ // we can get tensor_index from symbol_table
+ auto tensor = rec_tm_input.tensor();
+ tensor_index = symbol_table[tensor];
+ }
+ else
+ {
+ // or we can use tensor_index itself
+ tensor_index = rec_tm_input.tensor_index();
+ }
+
+ ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder};
+ tensormap_builder.add_name(name);
+ tensormap_builder.add_tensor_index(tensor_index);
+ tensormap_inputs.push_back(tensormap_builder.Finish());
+ }
+ // cook for outputs, same as inputs
+ for (int so = 0; so < rec_signature_def.outputs_size(); ++so)
+ {
+ auto rec_tm_output = rec_signature_def.outputs(so);
+ auto name = flatbuffer_builder->CreateString(rec_tm_output.name());
+ uint32_t tensor_index = 0;
+ assert(rec_tm_output.has_tensor() || rec_tm_output.has_tensor_index());
+ if (rec_tm_output.has_tensor())
+ {
+ auto tensor = rec_tm_output.tensor();
+ tensor_index = symbol_table[tensor];
+ }
+ else
+ {
+ tensor_index = rec_tm_output.tensor_index();
+ }
+
+ ::tflite::TensorMapBuilder tensormap_builder{*flatbuffer_builder};
+ tensormap_builder.add_name(name);
+ tensormap_builder.add_tensor_index(tensor_index);
+ tensormap_outputs.push_back(tensormap_builder.Finish());
+ }
+
+ auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs);
+ auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs);
+ auto method_name = flatbuffer_builder->CreateString(rec_signature_def.method_name());
+ auto key = flatbuffer_builder->CreateString(rec_signature_def.key());
+ // TODO add validation for method_name and key
+
+ ::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder};
+ signature_def_builder.add_inputs(inputs);
+ signature_def_builder.add_outputs(outputs);
+ signature_def_builder.add_method_name(method_name);
+ signature_def_builder.add_key(key);
+ signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index());
+
+ signdef_vec.emplace_back(signature_def_builder.Finish());
}
// Create "Model" arguments
auto buffers = flatbuffer_builder->CreateVector(buffer_vec);
+ auto signdefs = flatbuffer_builder->CreateVector(signdef_vec);
auto operator_codes = flatbuffer_builder->CreateVector(code_vec);
auto subgraphs = flatbuffer_builder->CreateVector(subgraph_vec);
auto description = flatbuffer_builder->CreateString("Generated by tflchef");
@@ -652,6 +748,7 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
model_builder.add_version(3);
model_builder.add_operator_codes(operator_codes);
+ model_builder.add_signature_defs(signdefs);
model_builder.add_subgraphs(subgraphs);
model_builder.add_description(description);
model_builder.add_buffers(buffers);
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 34d50d985..4162cb123 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -647,6 +647,22 @@ message Operation {
// use the number not listed in the above reserve list
}
+message TensorMap {
+ optional string name = 4;
+ // use tensor as name of the Operand or use tensor_index as order number.
+ // either one should exist.
+ optional string tensor = 5;
+ optional uint32 tensor_index = 6;
+}
+
+message SignatureDef {
+ repeated TensorMap inputs = 4;
+ repeated TensorMap outputs = 5;
+ optional string method_name = 6;
+ optional string key = 10;
+ optional uint32 subgraph_index = 12;
+}
+
// For additional subgraphs
message Graph {
repeated Operand operand = 1;
@@ -664,4 +680,5 @@ message ModelRecipe {
optional string name = 5;
optional uint32 version = 6 [default = 1];
repeated Graph graph = 7;
+ repeated SignatureDef signature_def = 8;
}
diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe
new file mode 100644
index 000000000..4481752ef
--- /dev/null
+++ b/compiler/tflchef/tests/signature_def_index/test.recipe
@@ -0,0 +1,60 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm1"
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm2"
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm3"
+}
+signature_def {
+ inputs: {
+ name: "ifm"
+ tensor_index: 0
+ }
+ outputs {
+ name: "ofm2"
+ tensor_index: 2
+ }
+ outputs {
+ name: "ofm3"
+ tensor_index: 3
+ }
+ outputs {
+ name: "ofm1"
+ tensor_index: 1
+ }
+ method_name: "serving_default"
+ key: "serv"
+ subgraph_index: 0
+}
+input: "ifm"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe
new file mode 100644
index 000000000..79be25138
--- /dev/null
+++ b/compiler/tflchef/tests/signature_def_name/test.recipe
@@ -0,0 +1,60 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm1"
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm2"
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm3"
+}
+signature_def {
+ inputs: {
+ name: "ifm"
+ tensor_index: 0
+ }
+ outputs {
+ name: "out2"
+ tensor: "ofm2"
+ }
+ outputs {
+ name: "out3"
+ tensor: "ofm3"
+ }
+ outputs {
+ name: "out1"
+ tensor: "ofm1"
+ }
+ method_name: "serving_default"
+ key: "serv"
+ subgraph_index: 0
+}
+input: "ifm"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index 7a480bc52..2351e4c3d 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -405,7 +405,7 @@ void dump_model(std::ostream &os, const tflite::Model *model)
for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
{
auto sign_i = signaturedefs->Get(i);
- os << "S(" << i << ") " << sign_i->method_name()->c_str() << ", key("
+ os << "S(" << i << ") method_name(" << sign_i->method_name()->c_str() << "), key("
<< sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")"
<< std::endl;
@@ -413,16 +413,18 @@ void dump_model(std::ostream &os, const tflite::Model *model)
for (uint32_t t = 0; t < inputs_i->Length(); ++t)
{
auto inputs_i_t = inputs_i->Get(t);
- os << " I T(" << t << ") " << inputs_i_t->name()->c_str() << ": "
- << inputs_i_t->tensor_index() << std::endl;
+ os << " I(" << t << ")"
+ << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") "
+ << inputs_i_t->name()->c_str() << std::endl;
}
auto outputs_i = sign_i->outputs();
for (uint32_t t = 0; t < outputs_i->Length(); ++t)
{
auto outputs_i_t = outputs_i->Get(t);
- os << " O T(" << t << ") " << outputs_i_t->name()->c_str() << ": "
- << outputs_i_t->tensor_index() << std::endl;
+ os << " O(" << t << ")"
+ << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") "
+ << outputs_i_t->name()->c_str() << std::endl;
}
}
os << std::endl;
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
index 4015631ab..fb8c211b6 100644
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -80,7 +80,10 @@ int entry(int argc, char **argv)
auto flatbuffer_builder = std::make_unique<flatbuffers::FlatBufferBuilder>(1024);
// convert tflite to circle
- tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model.get_model()};
+ tflite2circle::CircleModel circle_model{flatbuffer_builder};
+
+ circle_model.load_offsets(tfl_model.get_model());
+ circle_model.model_build();
std::ofstream outfile{circle_path, std::ios::binary};
diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h
index 14c4f1c12..189cfaff2 100644
--- a/compiler/tflite2circle/include/CircleModel.h
+++ b/compiler/tflite2circle/include/CircleModel.h
@@ -60,14 +60,17 @@ template <typename T> class Offset
private:
using TFLFlatBufVec = flatbuffers::Vector<typename T::TFL>;
using CIRFlatBufVecOffset = flatbuffers::Offset<flatbuffers::Vector<typename T::CIR>>;
+ using SignatureDefs = flatbuffers::Vector<flatbuffers::Offset<::tflite::SignatureDef>>;
public:
Offset(void) = delete;
Offset(FlatBufBuilder &fb) : _fb{fb} {};
public:
- // TODO use _fb
- void build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
+ void set_signature_defs(const SignatureDefs *offset) { _tfl_signature_def_offsets = offset; }
+
+public:
+ void build(const TFLFlatBufVec *tflite_flatbuffer_vec);
public:
CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; }
@@ -75,6 +78,8 @@ public:
private:
FlatBufBuilder &_fb;
CIRFlatBufVecOffset _circle_flatbuffer_vec_offset;
+ // TODO revise this when Circle supports SignatureDef
+ const SignatureDefs *_tfl_signature_def_offsets = nullptr;
};
class CircleModel
@@ -84,9 +89,10 @@ private:
public:
CircleModel(void) = delete;
- CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model);
+ CircleModel(FlatBufBuilder &fb);
public:
+ void load_offsets(const tflite::Model *tfl_model);
void model_build(void) const;
const char *base(void) const;
size_t size(void) const;
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index 4249f1560..90cc415ff 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -24,19 +24,16 @@
namespace tflite2circle
{
-template <>
-void Offset<MetaDataBufferLink>::build(FlatBufBuilder &fb,
- const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<MetaDataBufferLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
{
if (tflite_flatbuffer_vec == nullptr)
return;
std::vector<int32_t> metadata_buffer_vec{tflite_flatbuffer_vec->begin(),
tflite_flatbuffer_vec->end()};
- _circle_flatbuffer_vec_offset = fb->CreateVector(metadata_buffer_vec);
+ _circle_flatbuffer_vec_offset = _fb->CreateVector(metadata_buffer_vec);
}
-template <>
-void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<BufferLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec;
@@ -46,21 +43,22 @@ void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_f
if (it->data())
{
std::vector<uint8_t> data_vec{it->data()->begin(), it->data()->end()};
- buffer_data = fb->CreateVector(data_vec);
+ buffer_data = _fb->CreateVector(data_vec);
}
- circle::BufferBuilder circle_buffer_builder{*fb};
+ circle::BufferBuilder circle_buffer_builder{*_fb};
circle_buffer_builder.add_data(buffer_data);
auto circle_buffers = circle_buffer_builder.Finish();
buffers_vec.emplace_back(circle_buffers);
}
- _circle_flatbuffer_vec_offset = fb->CreateVector(buffers_vec);
+ _circle_flatbuffer_vec_offset = _fb->CreateVector(buffers_vec);
}
-template <>
-void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec;
+ int32_t subgraph_index = 0;
+
for (auto it_sg : *tflite_flatbuffer_vec)
{
// tensors of subgraph
@@ -74,12 +72,12 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
if (it->shape())
{
auto shape_vec = std::vector<int32_t>({it->shape()->begin(), it->shape()->end()});
- shape = fb->CreateVector(shape_vec);
+ shape = _fb->CreateVector(shape_vec);
}
// name
flatbuffers::Offset<flatbuffers::String> name;
if (it->name())
- name = fb->CreateString(it->name()->str());
+ name = _fb->CreateString(it->name()->str());
// quantization
flatbuffers::Offset<circle::QuantizationParameters> quantization;
if (it->quantization())
@@ -100,8 +98,8 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
auto rmax = it->quantization()->max();
tfmin = std::vector<float>{rmin->begin(), rmin->end()};
tfmax = std::vector<float>{rmax->begin(), rmax->end()};
- min = fb->CreateVector(tfmin);
- max = fb->CreateVector(tfmax);
+ min = _fb->CreateVector(tfmin);
+ max = _fb->CreateVector(tfmax);
}
if (it->quantization()->scale() && it->quantization()->zero_point())
@@ -110,11 +108,11 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
auto rz = it->quantization()->zero_point();
tfscale = std::vector<float>{rs->begin(), rs->end()};
tfzerop = std::vector<int64_t>{rz->begin(), rz->end()};
- scale = fb->CreateVector(tfscale);
- zero_point = fb->CreateVector(tfzerop);
+ scale = _fb->CreateVector(tfscale);
+ zero_point = _fb->CreateVector(tfzerop);
}
- quantization = circle::CreateQuantizationParameters(*fb, min, max, scale, zero_point,
+ quantization = circle::CreateQuantizationParameters(*_fb, min, max, scale, zero_point,
circle::QuantizationDetails_NONE, 0,
quantized_dimension);
}
@@ -135,7 +133,7 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
{
auto traversal_order_vec = std::vector<int32_t>{
it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
- traversal_order = fb->CreateVector(traversal_order_vec);
+ traversal_order = _fb->CreateVector(traversal_order_vec);
}
// block_map
@@ -143,7 +141,7 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
{
auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
it->sparsity()->block_map()->end()};
- block_map = fb->CreateVector(block_map_vec);
+ block_map = _fb->CreateVector(block_map_vec);
}
// dim_metadata
@@ -154,18 +152,18 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
// array_segments
auto tflite_array_segments_type = it->array_segments_type();
auto circle_array_segments =
- get_circle_sparse_index_vector(*fb, it->array_segments(), tflite_array_segments_type);
+ get_circle_sparse_index_vector(*_fb, it->array_segments(), tflite_array_segments_type);
auto circle_array_segments_type =
get_circle_sparse_index_vector_type(tflite_array_segments_type);
// array_indices
auto tflite_array_indices_type = it->array_indices_type();
auto circle_array_indices =
- get_circle_sparse_index_vector(*fb, it->array_indices(), tflite_array_indices_type);
+ get_circle_sparse_index_vector(*_fb, it->array_indices(), tflite_array_indices_type);
auto circle_array_indices_type =
get_circle_sparse_index_vector_type(tflite_array_indices_type);
- auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+ auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*_fb};
circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
circle_dim_metadata_builder.add_dense_size(it->dense_size());
@@ -176,9 +174,9 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
auto dim_metadata = circle_dim_metadata_builder.Finish();
dim_metadata_vec.emplace_back(dim_metadata);
}
- dim_metadata = fb->CreateVector(dim_metadata_vec);
+ dim_metadata = _fb->CreateVector(dim_metadata_vec);
- sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+ sparsity = circle::CreateSparsityParameters(*_fb, traversal_order, block_map, dim_metadata);
}
// shape signature
@@ -187,10 +185,10 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
{
auto shape_signature_vec =
std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
- shape_signature = fb->CreateVector(shape_signature_vec);
+ shape_signature = _fb->CreateVector(shape_signature_vec);
}
- circle::TensorBuilder tensor_builder{*fb};
+ circle::TensorBuilder tensor_builder{*_fb};
tensor_builder.add_shape(shape);
tensor_builder.add_type(get_circle_tensortype(it->type()));
tensor_builder.add_buffer(it->buffer());
@@ -202,19 +200,56 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
auto tensor = tensor_builder.Finish();
tensor_vec.emplace_back(tensor);
}
- auto circle_tensors = fb->CreateVector(tensor_vec);
+ auto circle_tensors = _fb->CreateVector(tensor_vec);
// inputs of subgraph
auto tflite_inputs = it_sg->inputs();
std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()};
- auto circle_inputs = fb->CreateVector(input_vec);
+ // apply signature_def to input tensor index so that input orders are correct
+ // NOTE we do not need this when circle format supports signature_def
+ if (_tfl_signature_def_offsets != nullptr)
+ {
+ for (auto it_signdef : *_tfl_signature_def_offsets)
+ {
+ if (it_signdef->subgraph_index() == subgraph_index)
+ {
+ auto inputs = it_signdef->inputs();
+ assert(inputs->size() == input_vec.size());
+ uint32_t input_vec_idx = 0;
+ for (auto it_tm : *inputs)
+ {
+ input_vec[input_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+ }
+ }
+ }
+ }
+
+ auto circle_inputs = _fb->CreateVector(input_vec);
// outputs of subgraph
auto tflite_outputs = it_sg->outputs();
std::vector<int32_t> output_vec{tflite_outputs->begin(), tflite_outputs->end()};
- auto circle_outputs = fb->CreateVector(output_vec);
+ if (_tfl_signature_def_offsets != nullptr)
+ {
+ // apply SignatureDef
+ for (auto it_signdef : *_tfl_signature_def_offsets)
+ {
+ if (it_signdef->subgraph_index() == subgraph_index)
+ {
+ auto outputs = it_signdef->outputs();
+ assert(outputs->size() == output_vec.size());
+ uint32_t output_vec_idx = 0;
+ for (auto it_tm : *outputs)
+ {
+ output_vec[output_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+ }
+ }
+ }
+ }
+
+ auto circle_outputs = _fb->CreateVector(output_vec);
// operators of subgraph
std::vector<flatbuffers::Offset<circle::Operator>> operator_vec;
@@ -226,12 +261,12 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
{
// inputs
std::vector<int32_t> input_vec{it->inputs()->begin(), it->inputs()->end()};
- auto circle_inputs = fb->CreateVector(input_vec);
+ auto circle_inputs = _fb->CreateVector(input_vec);
// outputs
std::vector<int32_t> output_vec{it->outputs()->begin(), it->outputs()->end()};
- auto circle_outputs = fb->CreateVector(output_vec);
+ auto circle_outputs = _fb->CreateVector(output_vec);
// builtin options
- auto circle_builtin_options = get_circle_builtin_options(*fb, it);
+ auto circle_builtin_options = get_circle_builtin_options(*_fb, it);
auto circle_builtin_options_type = get_circle_builtin_options_type(it);
// custom options
flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
@@ -239,14 +274,14 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
{
std::vector<uint8_t> custom_options_vec{it->custom_options()->begin(),
it->custom_options()->end()};
- circle_custom_options = fb->CreateVector(custom_options_vec);
+ circle_custom_options = _fb->CreateVector(custom_options_vec);
}
// custom options format
// TODO Make get_circle_custom_options_format
assert(it->custom_options_format() == tflite::CustomOptionsFormat_FLEXBUFFERS);
auto circle_custom_options_format = circle::CustomOptionsFormat_FLEXBUFFERS;
- circle::OperatorBuilder operator_builder{*fb};
+ circle::OperatorBuilder operator_builder{*_fb};
operator_builder.add_opcode_index(it->opcode_index());
operator_builder.add_inputs(circle_inputs);
operator_builder.add_outputs(circle_outputs);
@@ -259,13 +294,13 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
operator_vec.emplace_back(opeartor);
}
}
- auto circle_operators = fb->CreateVector(operator_vec);
+ auto circle_operators = _fb->CreateVector(operator_vec);
// name of subgraph
- auto subgraphs_name = fb->CreateString(it_sg->name());
+ auto subgraphs_name = _fb->CreateString(it_sg->name());
// subgraphs
- auto circle_subgraph_builder = circle::SubGraphBuilder{*fb};
+ auto circle_subgraph_builder = circle::SubGraphBuilder{*_fb};
circle_subgraph_builder.add_tensors(circle_tensors);
circle_subgraph_builder.add_inputs(circle_inputs);
@@ -276,8 +311,11 @@ void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
auto circle_subgraph = circle_subgraph_builder.Finish();
subgprahs_vec.emplace_back(circle_subgraph);
+
+ // next subgraph
+ subgraph_index = subgraph_index + 1;
}
- _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec);
+ _circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec);
}
tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
@@ -291,15 +329,14 @@ tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
return opcode->builtin_code();
}
-template <>
-void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
for (auto it : *tflite_flatbuffer_vec)
{
- auto custom_code = fb->CreateString(it->custom_code());
- circle::OperatorCodeBuilder operator_code_builder{*fb};
+ auto custom_code = _fb->CreateString(it->custom_code());
+ circle::OperatorCodeBuilder operator_code_builder{*_fb};
// TODO support circle deprecated_builtin_code
auto bt_code = builtin_code_neutral(it);
operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
@@ -308,23 +345,28 @@ void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tf
auto code = operator_code_builder.Finish();
operator_code_vec.emplace_back(code);
}
- _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec);
+ _circle_flatbuffer_vec_offset = _fb->CreateVector(operator_code_vec);
}
-CircleModel::CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model)
+CircleModel::CircleModel(FlatBufBuilder &fb)
: _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb}
{
- _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(fb);
- _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb);
- _buffers_offset = std::make_unique<Offset<BufferLink>>(fb);
- _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(fb);
+ // NOTHING TODO
+}
+
+void CircleModel::load_offsets(const tflite::Model *tfl_model)
+{
+ _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(_fb);
+ _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(_fb);
+ _buffers_offset = std::make_unique<Offset<BufferLink>>(_fb);
+ _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(_fb);
- _operator_codes_offset->build(fb, tfl_model->operator_codes());
- _subGraphs_offset->build(fb, tfl_model->subgraphs());
- _buffers_offset->build(fb, tfl_model->buffers());
- _metadata_buffer_offset->build(fb, tfl_model->metadata_buffer());
+ _subGraphs_offset->set_signature_defs(tfl_model->signature_defs());
- model_build();
+ _operator_codes_offset->build(tfl_model->operator_codes());
+ _subGraphs_offset->build(tfl_model->subgraphs());
+ _buffers_offset->build(tfl_model->buffers());
+ _metadata_buffer_offset->build(tfl_model->metadata_buffer());
}
void CircleModel::model_build(void) const
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index 50ee05242..2241c9ec9 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000000120001)
+ set(VCONONE_VERSION 0x0000000000130001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
diff --git a/docs/conf.py b/docs/conf.py
index b59cab878..ff4070f98 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.18.0'
+release = '1.19.0'
# -- General configuration ---------------------------------------------------
diff --git a/docs/release/1.19/index.rst b/docs/release/1.19/index.rst
new file mode 100644
index 000000000..c80782ce8
--- /dev/null
+++ b/docs/release/1.19/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+ sphinx-quickstart on Wed Nov 10 15:21:13 2021.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.19
+====
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.19.0.md
diff --git a/docs/release/1.19/release-note-1.19.0.md b/docs/release/1.19/release-note-1.19.0.md
new file mode 100644
index 000000000..e63d8706c
--- /dev/null
+++ b/docs/release/1.19/release-note-1.19.0.md
@@ -0,0 +1,8 @@
+# Release Note 1.19.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- `circle-quantizer` supports input/output type option
+- Introduce configuration file for optimization options
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
index b48239f2a..99118c5d9 100644
--- a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -3,8 +3,10 @@ function(_CMSISSource_import)
nnas_include(OptionTools)
envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+ set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad)
- ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL})
+ ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}
+ CHECKSUM "SHA256=${CMSIS_5_8_0_SHA256}")
set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE)
set(CMSISSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
index 0eb8eb91c..8b0a602cb 100644
--- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
@@ -27,8 +27,9 @@ function(_FlatBuffers_build)
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.10-fix4"
- EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+ IDENTIFIER "1.10-fix6"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+ "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
PKG_NAME "FLATBUFFERS-1.10")
endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
index daa749c58..06366db63 100644
--- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
@@ -27,8 +27,9 @@ function(_FlatBuffers_build)
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build
INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12
BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.12-fix1"
- EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+ IDENTIFIER "1.12-fix3"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+ "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
PKG_NAME "FLATBUFFERS-1.12")
endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake
index e551e29c8..8b0a602cb 100644
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersConfig.cmake
@@ -27,8 +27,9 @@ function(_FlatBuffers_build)
BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.10-fix4"
- EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+ IDENTIFIER "1.10-fix6"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+ "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
PKG_NAME "FLATBUFFERS-1.10")
endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
index 8b1743066..805554538 100644
--- a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -3,8 +3,10 @@ function(_MbedOSSource_import)
nnas_include(OptionTools)
envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+ set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e)
- ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL})
+ ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}
+ CHECKSUM "SHA256=${MBEDOS_6_15_SHA256}")
set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE)
set(MbedOSSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog
index 12af5f928..2763ac55b 100644
--- a/infra/debian/compiler/changelog
+++ b/infra/debian/compiler/changelog
@@ -1,3 +1,10 @@
+one (1.19.0) bionic; urgency=medium
+
+ * `circle-quantizer` supports input/output type option
+ * Introduce configuration file for optimization options
+
+ -- seongwoo <mhs4670go@naver.com> Wed, 10 Nov 2021 15:53:39 +0900
+
one (1.18.0) bionic; urgency=medium
* More optimization pass
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog
index ee0d3e6ee..4cf0abc30 100644
--- a/infra/debian/runtime/changelog
+++ b/infra/debian/runtime/changelog
@@ -1,3 +1,9 @@
+one (1.19.0) bionic; urgency=low
+
+ * Synch up version with ONE Compiler
+
+ -- Chunseok Lee <chunseok.lee@samsung.com> Wed, 10 Nov 2021 14:23:00 +0900
+
one (1.18.0) bionic; urgency=low
* Synch up version with ONE Compiler
diff --git a/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
new file mode 100644
index 000000000..544be030a
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake
@@ -0,0 +1,66 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
+
+set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
+
+# Convert TARGET_CPU=Cortex-M33+nofp+nodsp into
+# - CMAKE_SYSTEM_PROCESSOR=cortex-m33
+# - TARGET_CPU_FEATURES=no-fp;no-dsp
+string(REPLACE "+" ";" TARGET_CPU_FEATURES ${TARGET_CPU})
+list(POP_FRONT TARGET_CPU_FEATURES CMAKE_SYSTEM_PROCESSOR)
+string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR)
+
+set(CMAKE_EXECUTABLE_SUFFIX ".elf")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Select C/C++ version
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_CXX_STANDARD 14)
+
+# Compile options
+add_compile_options(
+ -mcpu=${TARGET_CPU}
+ -mthumb
+ "$<$<CONFIG:DEBUG>:-gdwarf-3>"
+ "$<$<COMPILE_LANGUAGE:CXX>:-funwind-tables;-frtti;-fexceptions>")
+
+# Compile definescd
+add_compile_definitions(
+ "$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")
+
+# Link options
+add_link_options(
+ -mcpu=${TARGET_CPU}
+ -mthumb
+ --specs=nosys.specs)
+
+# Set floating point unit
+if("${TARGET_CPU}" MATCHES "\\+fp")
+ set(FLOAT hard)
+elseif("${TARGET_CPU}" MATCHES "\\+nofp")
+ set(FLOAT soft)
+elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR
+ "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55")
+ set(FLOAT hard)
+else()
+ set(FLOAT soft)
+endif()
+
+if (FLOAT)
+ add_compile_options(-mfloat-abi=${FLOAT})
+ add_link_options(-mfloat-abi=${FLOAT})
+endif()
+
+# Compilation warnings
+add_compile_options(
+ -Wno-all
+)
diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
index 4761e848c..6ae7dea34 100644
--- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
@@ -125,6 +125,13 @@ function(_ARMCompute_Build ARMComputeInstall_DIR)
list(APPEND SCONS_OPTIONS "Werror=0")
list(APPEND SCONS_OPTIONS "os=${TARGET_OS}")
+ #### Disable test build
+ list(APPEND SCONS_OPTIONS "benchmark_tests=0")
+ list(APPEND SCONS_OPTIONS "validation_tests=0")
+ list(APPEND SCONS_OPTIONS "benchmark_examples=0")
+ list(APPEND SCONS_OPTIONS "validate_examples=0")
+ list(APPEND SCONS_OPTIONS "reference_openmp=0")
+
if(DEFINED EXTERNALS_BUILD_THREADS)
set(N ${EXTERNALS_BUILD_THREADS})
else(DEFINED EXTERNALS_BUILD_THREADS)
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 4133d7a06..547d46a0d 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
Name: nnfw
Summary: nnfw
-Version: 1.18.0
+Version: 1.19.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
diff --git a/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
new file mode 100644
index 000000000..2cd7b9065
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Conv2D_005/test.recipe
@@ -0,0 +1,34 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: ""
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
new file mode 100644
index 000000000..ead0c33ad
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Tanh_FC_nobias/test.recipe
@@ -0,0 +1,42 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "Tanh"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operation {
+ type: "Tanh"
+ input: "in"
+ output: "Tanh"
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "Tanh"
+ input: "weight"
+ input: ""
+ output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
new file mode 100644
index 000000000..ae993e6d8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
@@ -0,0 +1,82 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm3"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm1"
+ add_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Mul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm2"
+ mul_options {
+ activation: 0
+ }
+}
+operation {
+ type: "Sub"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm3"
+ sub_options {
+ activation: 0
+ }
+}
+signature_def {
+ inputs: {
+ name: "ifm1"
+ tensor_index: 0
+ }
+ inputs: {
+ name: "ifm2"
+ tensor_index: 1
+ }
+ outputs {
+ name: "ofm2"
+ tensor_index: 3
+ }
+ outputs {
+ name: "ofm3"
+ tensor_index: 4
+ }
+ outputs {
+ name: "ofm1"
+ tensor_index: 2
+ }
+ method_name: "serving_default"
+ key: "serv"
+ subgraph_index: 0
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index b432929b5..9b8840fc2 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.18.0"
+ versionName "1.19.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 4fce291a0..b885a6b90 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001200
+#define NNFW_VERSION 0x01001300
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc
index a7185ca0b..514c01485 100644
--- a/runtime/onert/test/core/compiler/HEScheduler.cc
+++ b/runtime/onert/test/core/compiler/HEScheduler.cc
@@ -351,14 +351,19 @@ protected:
std::string _original_profiling_mode;
};
+//
+// HEScheduler tests
+//
+
class HESchedulerTestWithExecutorParam : public HESchedulerTest,
public testing::WithParamInterface<std::string>
{
};
-//
-// HEScheduler tests
-//
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+ testing::Values(LINEAR, DATAFLOW, PARALLEL));
// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
@@ -490,11 +495,6 @@ TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
}
}
-// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
-// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
- testing::Values(LINEAR, DATAFLOW, PARALLEL));
-
// Test scheduler behavior for branched graph and enabled profiling mode
TEST_F(HESchedulerTest, branched_graph_profiling_mode)
{