summaryrefslogtreecommitdiff
path: root/compiler/luci/pass/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/luci/pass/src')
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp121
-rw-r--r--compiler/luci/pass/src/CircleOptimizerUtils.cpp89
-rw-r--r--compiler/luci/pass/src/CircleOptimizerUtils.h42
-rw-r--r--compiler/luci/pass/src/FuseBCQPass.cpp405
-rw-r--r--compiler/luci/pass/src/FuseInstanceNormPass.cpp231
-rw-r--r--compiler/luci/pass/src/FuseInstanceNormPass.test.cpp64
-rw-r--r--compiler/luci/pass/src/FuseInstanceNormPassInternal.h28
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.cpp172
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.h38
-rw-r--r--compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp495
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp551
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpAddPass.cpp124
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp69
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp185
14 files changed, 2588 insertions, 26 deletions
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index dcb05a0b5..90fbe9009 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -16,16 +16,23 @@
#include "luci/CircleOptimizer.h"
+#include "luci/Pass/FuseBCQPass.h"
#include "luci/Pass/FuseInstanceNormPass.h"
+#include "luci/Pass/ResolveCustomOpAddPass.h"
+#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
+#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
// TODO add more passes
#include "luci/Pass/ShapeInferencePass.h"
#include "luci/Pass/TypeInferencePass.h"
// logo passes
-#include <logo/RemoveDeadNodePass.h>
+#include <logo/RemoveDeadNodeWithQueryPass.h>
#include "ProgressReporter.h"
+#include "CircleOptimizerUtils.h"
#include <logo/Phase.h>
@@ -36,18 +43,39 @@ namespace
using namespace luci;
-class OptimizeOptionsImpl : public luci::CircleOptimizer::Options
+class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
{
public:
void enable(Algorithm) final;
+ void param(AlgorithmParameters, const std::string &) final;
+ const std::string param(AlgorithmParameters) const final;
bool query(Algorithm) final;
private:
std::vector<Algorithm> _algorithms;
+ std::map<AlgorithmParameters, const std::string> _algorithm_params;
};
void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
+void OptimizeOptionsImpl::param(AlgorithmParameters param, const std::string &str)
+{
+ _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str));
+}
+
+const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const
+{
+ auto param_str = _algorithm_params.find(param);
+ if (param_str != _algorithm_params.end())
+ {
+ return param_str->second;
+ }
+ else
+ {
+ return std::string();
+ }
+}
+
bool OptimizeOptionsImpl::query(Algorithm algo)
{
std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
@@ -77,14 +105,31 @@ void CircleOptimizer::optimize(loco::Graph *g) const
logo::Phase phase;
/* TRANSFORM DECLARATION BEGIN */
+ if (_options->query(Options::Algorithm::ResolveCustomOpAdd))
+ {
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>());
+ }
+ if (_options->query(Options::Algorithm::ResolveCustomOpBatchMatMul))
+ {
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpBatchMatMulPass>());
+ }
+ if (_options->query(Options::Algorithm::ResolveCustomOpMatMul))
+ {
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpMatMulPass>());
+ }
if (_options->query(Options::Algorithm::FuseInstanceNorm))
{
phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
}
+ if (_options->query(Options::Algorithm::FuseBCQ))
+ {
+ phase.emplace_back(std::make_unique<FuseBCQPass>());
+ }
+
// Shape inference is needed for added nodes doing above transformations
phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
- phase.emplace_back(std::make_unique<logo::RemoveDeadNodePass>());
+ phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
/* TRANSFORM DECLARATION END */
ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
@@ -93,4 +138,74 @@ void CircleOptimizer::optimize(loco::Graph *g) const
phase_runner.run(phase);
}
+void CircleOptimizer::quantize(loco::Graph *g) const
+{
+ // Fake quantization of weights
+ if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
+ {
+ static const std::vector<std::string> fakeq_supported_input_dtype{"float32"};
+ static const std::vector<std::string> fakeq_supported_output_dtype{"uint8"};
+ static const std::vector<std::string> fakeq_supported_granularity{"layer"};
+
+ auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
+ auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+ auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+
+ if (!in_array(to_lower_case(input_dtype), fakeq_supported_input_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input type: " +
+ to_string(fakeq_supported_input_dtype));
+
+ if (!in_array(to_lower_case(output_dtype), fakeq_supported_output_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output type: " +
+ to_string(fakeq_supported_output_dtype));
+
+ if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
+ throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+ to_string(fakeq_supported_granularity));
+
+ luci::QuantizeDequantizeWeightsPass fake_quantizer(
+ str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
+ fake_quantizer.run(g);
+ }
+
+ // Actual quantization of weights, bias, and activation
+ if (_options->query(Options::Algorithm::QuantizeWithMinMax))
+ {
+ static const std::vector<std::string> qwmm_supported_input_dtype{"float32"};
+ static const std::vector<std::string> qwmm_supported_output_dtype{"uint8"};
+ static const std::vector<std::string> qwmm_supported_granularity{"layer"};
+
+ auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
+ auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+ auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+
+ if (!in_array(to_lower_case(input_dtype), qwmm_supported_input_dtype))
+ throw std::runtime_error("Unsupported input type. List of supported input types: " +
+ to_string(qwmm_supported_input_dtype));
+
+ if (!in_array(to_lower_case(output_dtype), qwmm_supported_output_dtype))
+ throw std::runtime_error("Unsupported output type. List of supported output types: " +
+ to_string(qwmm_supported_output_dtype));
+
+ if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
+ throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+ to_string(qwmm_supported_granularity));
+
+ luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype),
+ str_to_granularity(granularity));
+ quantizer.run(g);
+ }
+
+ logo::Phase phase;
+
+ // Do Shape/Type inference
+ phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
+ phase.emplace_back(std::make_unique<luci::TypeInferencePass>());
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+}
+
} // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizerUtils.cpp b/compiler/luci/pass/src/CircleOptimizerUtils.cpp
new file mode 100644
index 000000000..ffc372392
--- /dev/null
+++ b/compiler/luci/pass/src/CircleOptimizerUtils.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOptimizerUtils.h"
+
+namespace luci
+{
+
+bool in_array(const std::string &str, const std::vector<std::string> &array)
+{
+ return std::find(array.begin(), array.end(), str) != array.end();
+}
+
+std::string to_string(const std::vector<std::string> &strings)
+{
+ assert(!strings.empty());
+
+ std::string res;
+ for (unsigned int i = 0; i < strings.size() - 1; i++)
+ res += strings[i] + ", ";
+
+ res += strings[strings.size() - 1];
+ return res;
+}
+
+std::string to_lower_case(std::string s)
+{
+ std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+ return s;
+}
+
+loco::DataType str_to_dtype(const std::string &str)
+{
+ if (to_lower_case(str).compare("uint8") == 0)
+ return loco::DataType::U8;
+ if (to_lower_case(str).compare("uint16") == 0)
+ return loco::DataType::U16;
+ if (to_lower_case(str).compare("uint32") == 0)
+ return loco::DataType::U32;
+ if (to_lower_case(str).compare("uint64") == 0)
+ return loco::DataType::U64;
+
+ if (to_lower_case(str).compare("int8") == 0)
+ return loco::DataType::S8;
+ if (to_lower_case(str).compare("int16") == 0)
+ return loco::DataType::S16;
+ if (to_lower_case(str).compare("int32") == 0)
+ return loco::DataType::S32;
+ if (to_lower_case(str).compare("int64") == 0)
+ return loco::DataType::S64;
+
+ if (to_lower_case(str).compare("float16") == 0)
+ return loco::DataType::FLOAT16;
+ if (to_lower_case(str).compare("float32") == 0)
+ return loco::DataType::FLOAT32;
+ if (to_lower_case(str).compare("float64") == 0)
+ return loco::DataType::FLOAT64;
+
+ if (to_lower_case(str).compare("bool") == 0)
+ return loco::DataType::BOOL;
+
+ return loco::DataType::Unknown;
+}
+
+QuantizationGranularity str_to_granularity(const std::string &str)
+{
+ if (to_lower_case(str).compare("layer") == 0)
+ return QuantizationGranularity::LayerWise;
+
+ if (to_lower_case(str).compare("channel") == 0)
+ return QuantizationGranularity::ChannelWise;
+
+ throw std::runtime_error("Quantization granularity must be either 'layer' or 'channel'");
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleOptimizerUtils.h b/compiler/luci/pass/src/CircleOptimizerUtils.h
new file mode 100644
index 000000000..7e577a05f
--- /dev/null
+++ b/compiler/luci/pass/src/CircleOptimizerUtils.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
+#define __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
+
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+
+#include <loco.h>
+
+#include <algorithm>
+
+namespace luci
+{
+
+bool in_array(const std::string &, const std::vector<std::string> &);
+
+std::string to_string(const std::vector<std::string> &);
+
+std::string to_lower_case(std::string);
+
+loco::DataType str_to_dtype(const std::string &);
+
+QuantizationGranularity str_to_granularity(const std::string &);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_OPTIMIZER_UTILS_H__
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp
new file mode 100644
index 000000000..b81db8827
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBCQPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <cassert>
+#include <string>
+#include <set>
+
+namespace
+{
+
+/**
+ * @brief Circle nodes including BCQ information and a circle node to which BCQ will be applied
+ * are connected with their name. And their names include common prefix.
+ * However, after pb file is converted to tflite file, some nodes' name are changed.
+ * Thus this function will return original common prefix.
+ *
+ * @note All the re-naming rule of TFLite converter is not figured out.
+ * Therefore, if new naming rule is detected, this function should be updated.
+ */
+const std::string node_name_prefix(luci::NodeName node_name)
+{
+ std::string prefix = node_name;
+
+ if (prefix.find("ReadVariableOp/resource/") != std::string::npos)
+ {
+ const auto start_index = prefix.find("ReadVariableOp/resource/");
+
+ const auto left_prefix = prefix.substr(0, start_index);
+ const auto right_prefix = prefix.substr(start_index + 24);
+
+ prefix = left_prefix + right_prefix;
+ }
+
+ if (prefix.find("Tensordot/") != std::string::npos)
+ {
+ const auto index = prefix.find("Tensordot/");
+ prefix = prefix.substr(0, index - 1);
+ }
+ else if (prefix.find("kernel/") != std::string::npos)
+ {
+ const auto index = prefix.find("kernel/");
+ prefix = prefix.substr(0, index - 1);
+ }
+ else if (prefix.find("/bcqinfo_") != std::string::npos)
+ {
+ const auto index = prefix.find("/bcqinfo_");
+ prefix = prefix.substr(0, index);
+ }
+
+ return prefix;
+}
+
+} // namespace
+
+namespace
+{
+
+class BCQConverter final
+{
+public:
+ void add_BCQ_info_node(luci::CircleConst *node)
+ {
+ const auto node_name = node->name();
+ const auto prefix = node_name_prefix(node_name);
+
+ // If bcqinfo_* nodes are held by Reshape operation,
+ // shape of bcqinfo_* nodes are copied to `shape` input of Reshape operation.
+ // Then the name becomes bcqinfo_*_copy_shape.
+ // We should prevent this node not to added to bcq information.
+ if (node_name.find("_copy_shape") != std::string::npos)
+ return;
+
+ if (node_name.find("bcqinfo_do_w_x") != std::string::npos)
+ _do_w_x[prefix] = node;
+ else if (node_name.find("bcqinfo_alpha") != std::string::npos)
+ _alpha[prefix] = node;
+ else if (node_name.find("bcqinfo_packed_binary_code") != std::string::npos)
+ _packed_binary_code[prefix] = node;
+ else if (node_name.find("bcqinfo_number_of_clusters") != std::string::npos)
+ _number_of_clusters[prefix] = node;
+ else if (node_name.find("bcqinfo_size_of_clusters") != std::string::npos)
+ _size_of_clusters[prefix] = node;
+ else if (node_name.find("bcqinfo_qbits_of_clusters") != std::string::npos)
+ _qbits_of_clusters[prefix] = node;
+ else if (node_name.find("bcqinfo_dequant_weight") != std::string::npos)
+ _dequant_weight[prefix] = node;
+ }
+
+ bool has_BCQ_info(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+ bool has_info = true;
+
+ has_info &= (_do_w_x.find(prefix) != _do_w_x.end());
+ has_info &= (_alpha.find(prefix) != _alpha.end());
+ has_info &= (_packed_binary_code.find(prefix) != _packed_binary_code.end());
+ has_info &= (_number_of_clusters.find(prefix) != _number_of_clusters.end());
+ has_info &= (_size_of_clusters.find(prefix) != _size_of_clusters.end());
+ has_info &= (_qbits_of_clusters.find(prefix) != _qbits_of_clusters.end());
+ // bcqinfo_dequant_weight is just for validation, so not always exists.
+
+ return has_info;
+ }
+
+ bool do_w_x(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+
+ if (_do_w_x[prefix]->dtype() == loco::DataType::S32)
+ return _do_w_x[prefix]->at<loco::DataType::S32>(0) == 1;
+ else if (_do_w_x[prefix]->dtype() == loco::DataType::BOOL)
+ return _do_w_x[prefix]->at<loco::DataType::BOOL>(0);
+ else
+ throw std::runtime_error("do_w_x should be int or bool");
+ }
+
+ luci::CircleConst *get_alpha(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+ return _alpha[prefix];
+ }
+
+ luci::CircleConst *get_packed_binary_code(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+ return _packed_binary_code[prefix];
+ }
+
+ luci::CircleConst *get_number_of_clusters(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+ return _number_of_clusters[prefix];
+ }
+
+ luci::CircleConst *get_size_of_clusters(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+ return _size_of_clusters[prefix];
+ }
+
+ luci::CircleConst *get_qbits_of_clusters(luci::CircleConst *node)
+ {
+ const auto prefix = node_name_prefix(node->name());
+ return _qbits_of_clusters[prefix];
+ }
+
+ luci::CircleConst *packed_clusters(luci::CircleConst *node)
+ {
+ auto graph = node->graph();
+ auto qbits_of_clusters = get_qbits_of_clusters(node);
+ auto size_of_clusters = get_size_of_clusters(node);
+ const auto number_of_clusters = get_number_of_clusters(node)->at<loco::DataType::S32>(0);
+
+ auto packed_clusters = graph->nodes()->create<luci::CircleConst>();
+ packed_clusters->dtype(loco::DataType::S32);
+ packed_clusters->size<loco::DataType::S32>(number_of_clusters * 2);
+ packed_clusters->rank(2);
+ packed_clusters->dim(0) = number_of_clusters;
+ packed_clusters->dim(1) = 2;
+ packed_clusters->shape_status(luci::ShapeStatus::VALID);
+
+ for (int i = 0; i < number_of_clusters; ++i)
+ {
+ packed_clusters->at<loco::DataType::S32>(i * 2) =
+ qbits_of_clusters->at<loco::DataType::S32>(i);
+ packed_clusters->at<loco::DataType::S32>(i * 2 + 1) =
+ size_of_clusters->at<loco::DataType::S32>(i);
+ }
+
+ return packed_clusters;
+ }
+
+ /**
+ * @brief Exclude BCQ information nodes which are used for fusing BCQ operations
+ * from graph output by using CircleOutputExclude
+ */
+ void clear_BCQ_nodes()
+ {
+ auto createNoOp = [](luci::CircleNode *circle_node) {
+ auto graph = circle_node->graph();
+ auto noOp = graph->nodes()->create<luci::CircleOutputExclude>();
+
+ if (circle_node->shape_status() == luci::ShapeStatus::VALID)
+ {
+ noOp->dtype(circle_node->dtype());
+ noOp->rank(circle_node->rank());
+ for (uint32_t i = 0; i < circle_node->rank(); ++i)
+ noOp->dim(i) = circle_node->dim(i);
+ }
+ else
+ {
+ // For type inference
+ noOp->dtype(loco::DataType::FLOAT32);
+ }
+
+ return noOp;
+ };
+
+ auto clear_nodes = [createNoOp](std::map<std::string, luci::CircleConst *> &nodes) {
+ for (auto &n : nodes)
+ {
+ auto node = n.second;
+
+ for (auto s : loco::succs(node))
+ {
+ if (auto outnode = dynamic_cast<luci::CircleOutput *>(s))
+ {
+ outnode->from(createNoOp(node));
+ }
+ else if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(s))
+ {
+ for (auto o : loco::succs(reshape_node))
+ {
+ auto circle_output = loco::must_cast<luci::CircleOutput *>(o);
+ circle_output->from(createNoOp(reshape_node));
+ }
+ }
+ }
+ }
+ };
+
+ clear_nodes(_do_w_x);
+ clear_nodes(_alpha);
+ clear_nodes(_packed_binary_code);
+ clear_nodes(_number_of_clusters);
+ clear_nodes(_size_of_clusters);
+ clear_nodes(_qbits_of_clusters);
+ clear_nodes(_dequant_weight);
+ }
+
+private:
+ std::map<std::string, luci::CircleConst *> _do_w_x;
+ std::map<std::string, luci::CircleConst *> _alpha;
+ std::map<std::string, luci::CircleConst *> _packed_binary_code;
+ std::map<std::string, luci::CircleConst *> _number_of_clusters;
+ std::map<std::string, luci::CircleConst *> _size_of_clusters;
+ std::map<std::string, luci::CircleConst *> _qbits_of_clusters;
+ std::map<std::string, luci::CircleConst *> _dequant_weight;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBCQPass::run(loco::Graph *g)
+{
+ BCQConverter converter;
+
+ bool changed = false;
+
+ for (auto node : loco::all_nodes(g))
+ {
+ if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
+ {
+ converter.add_BCQ_info_node(circle_const);
+ }
+ }
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto gather = dynamic_cast<luci::CircleGather *>(node))
+ {
+ auto params = dynamic_cast<luci::CircleConst *>(gather->params());
+ if (params != nullptr && converter.has_BCQ_info(params))
+ {
+ auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
+
+ bcq_gather->input_scales(converter.get_alpha(params));
+ bcq_gather->input_binary(converter.get_packed_binary_code(params));
+ bcq_gather->indices(gather->indices());
+ bcq_gather->input_clusters(converter.packed_clusters(params));
+
+ const auto binary_hidden_size =
+ loco::must_cast<luci::CircleConst *>(bcq_gather->input_binary())->dim(1).value() * 32;
+ bcq_gather->input_hidden_size(binary_hidden_size);
+
+ if (converter.do_w_x(params))
+ {
+ bcq_gather->axis(gather->axis());
+ }
+ else
+ {
+ const auto axis_transpose = (gather->axis() == 0) ? 1 : 0;
+ bcq_gather->axis(axis_transpose);
+ }
+
+ loco::replace(gather).with(bcq_gather);
+
+ changed = true;
+ }
+ }
+ else if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
+ {
+ auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights());
+ if (weights != nullptr && converter.has_BCQ_info(weights))
+ {
+ auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+
+ bcq_fc->weights_scales(converter.get_alpha(weights));
+ bcq_fc->weights_binary(converter.get_packed_binary_code(weights));
+ bcq_fc->bias(fully_connected->bias());
+ bcq_fc->weights_clusters(converter.packed_clusters(weights));
+ bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+
+ loco::Node *bcq_input = fully_connected->input();
+ int32_t batch_rank = 0;
+
+ // If input of BCQFullyConnected has more than rank 2, we should reshape it as rank 2
+ const auto original_input = loco::must_cast<luci::CircleNode *>(fully_connected->input());
+ if (original_input->shape_status() == ShapeStatus::VALID && original_input->rank() > 2)
+ {
+ auto new_shape = g->nodes()->create<luci::CircleConst>();
+ new_shape->dtype(loco::DataType::S32);
+ new_shape->size<loco::DataType::S32>(2);
+ new_shape->rank(1);
+ new_shape->dim(0) = 2;
+
+ auto batch_size = 1;
+ for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
+ batch_size *= original_input->dim(i).value();
+
+ new_shape->at<loco::DataType::S32>(0) = batch_size;
+ new_shape->at<loco::DataType::S32>(1) =
+ original_input->dim(original_input->rank() - 1).value();
+ new_shape->shape_status(ShapeStatus::VALID);
+
+ auto reshape = g->nodes()->create<luci::CircleReshape>();
+ reshape->tensor(original_input);
+ reshape->shape(new_shape);
+
+ bcq_input = reshape;
+ batch_rank = original_input->rank() - 2;
+ }
+
+ // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
+ if (converter.do_w_x(weights))
+ {
+ const auto binary_hidden_size =
+ loco::must_cast<luci::CircleNode *>(fully_connected->input())
+ ->dim(batch_rank)
+ .value();
+ bcq_fc->weights_hidden_size(binary_hidden_size);
+ bcq_fc->input(bcq_input);
+ loco::replace(fully_connected).with(bcq_fc);
+ }
+ else
+ {
+ const auto binary_hidden_size =
+ loco::must_cast<luci::CircleNode *>(fully_connected->input())
+ ->dim(1 + batch_rank)
+ .value();
+ bcq_fc->weights_hidden_size(binary_hidden_size);
+
+ auto perm = g->nodes()->create<luci::CircleConst>();
+ perm->dtype(loco::DataType::S32);
+ perm->size<loco::DataType::S32>(2);
+ perm->rank(1);
+ perm->dim(0) = 2;
+ perm->at<loco::DataType::S32>(0) = 1;
+ perm->at<loco::DataType::S32>(1) = 0;
+ perm->shape_status(ShapeStatus::VALID);
+
+ auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+ input_transpose->a(bcq_input);
+ input_transpose->perm(perm);
+
+ bcq_fc->input(input_transpose);
+
+ auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+ output_transpose->a(bcq_fc);
+ output_transpose->perm(perm);
+
+ loco::replace(fully_connected).with(output_transpose);
+ }
+
+ changed = true;
+ }
+ }
+ }
+
+ if (changed)
+ converter.clear_BCQ_nodes();
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
index 180b5bbef..ad8765c41 100644
--- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
@@ -15,6 +15,7 @@
*/
#include "luci/Pass/FuseInstanceNormPass.h"
+#include "FuseInstanceNormPassInternal.h"
#include <luci/IR/CircleNodes.h>
@@ -114,8 +115,6 @@ bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NOD
} // namespace
// Helper to check detail
-namespace
-{
/// @return true When node has shape of '1 x .. x 1 x depth'
bool is_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
@@ -130,7 +129,23 @@ bool is_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
return node->dim(axis).value() == depth;
}
-bool is_instance_mean(luci::CircleMean *mean)
+/// @return true if node shape consists of ones, except the one before the last dim: 1,...1,depth,1
+bool is_quasi_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
+{
+ auto rank = node->rank();
+ // minimal accepted shape is [1 x depth x 1]
+ if (rank < 3)
+ return false;
+ const auto depth_axis = rank - 2;
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ if (axis != depth_axis && node->dim(axis).value() != 1)
+ return false;
+ }
+ return node->dim(depth_axis).value() == depth;
+}
+
+bool is_instance_mean_v0(luci::CircleMean *mean)
{
//
// CHECK 1) input is rank 4
@@ -175,7 +190,53 @@ bool is_instance_mean(luci::CircleMean *mean)
return mean->keep_dims();
}
-} // namespace
+bool is_instance_mean_v1(luci::CircleMean *mean)
+{
+ //
+ // CHECK 1) input is rank 5 (NHWCX)
+ //
+ auto input = mean->input();
+ if (not loco::shape_known(input))
+ return false;
+ auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+ if (input_shape.rank() != 5)
+ return false;
+
+ //
+ // CHECK 2) 'reduction indices' is CircleConst of value [1,2,4], that is HWX of NHWCX input shape
+ //
+ // TODO Support equivalent case, like [-3,-2]
+ // TODO Support non-Const case?
+ // TODO What if input is NCHW format in Circle?
+ auto red_indices = dynamic_cast<luci::CircleConst *>(mean->reduction_indices());
+ if (not red_indices)
+ return false;
+ if (red_indices->rank() != 1)
+ return false;
+ std::set<int32_t> red_indices_set;
+
+ // TODO Currently only support S32, support other types
+ if (red_indices->dtype() != loco::DataType::S32)
+ return false;
+ for (uint32_t i = 0; i < red_indices->dim(0).value(); ++i)
+ red_indices_set.insert(red_indices->at<loco::DataType::S32>(i));
+
+ if (red_indices_set.size() != 3)
+ return false;
+ if (red_indices_set.find(1) == red_indices_set.end())
+ return false;
+ if (red_indices_set.find(2) == red_indices_set.end())
+ return false;
+ if (red_indices_set.find(4) == red_indices_set.end())
+ return false;
+
+ //
+ // CHECK 3) keep_dims == true (?)
+ //
+ // We only have case of 'keep_dims == true' so far, but it might be okay with 'keep_dims == false'
+ // TODO Check this fact, and if true, return true regardless of keep_dims
+ return mean->keep_dims();
+}
// Helper to fuse Instance Norm
namespace
@@ -227,14 +288,61 @@ namespace
* |
* V
* [Out]
+ *-------------------------------------------------------------------
+ * [In]
+ * |
+ * V
+ * ifm
+ * |
+ * V
+ * +---------reshape_of_ifm ----+ (reduction indicies)
+ * | | | |
+ * | | V V
+ * | | mean_of_reshape -------------+
+ * | V | |
+ * | sqdiff <--+ (reduction indicies) |
+ * | | | |
+ * | V | |
+ * | mean_as_variance <---+ const_as_epsilon |
+ * | | | |
+ * | V | |
+ * | add_as_variance <--------+ |
+ * | | |
+ * | V |
+ * | rsqrt const_as_gamma |
+ * | | | |
+ * | V | |
+ * | mul_gamma <--+ |
+ * | | | |
+ * V V V |
+ * mul_as_scaled_reshape mul_as_scaled_mean <-----------+
+ * | |
+ * | const_as_beta |
+ * | | V
+ * | +------> sub
+ * V |
+ * add_as_terminal <----------+
+ * |
+ * V
+ * reshape_as_terminal
+ * |
+ * V
+ * [Out]
*/
class InstanceNormPattern final
{
public:
- InstanceNormPattern(luci::CircleAdd *candidate)
+ enum PatternVersion
+ {
+ Version_0,
+ Version_1
+ };
+
+ InstanceNormPattern(luci::CircleAdd *candidate, PatternVersion pv)
{
assert(candidate);
add_as_terminal = candidate;
+ _pv = pv;
}
public:
@@ -244,7 +352,9 @@ public:
public:
// Context
loco::Node *ifm = nullptr;
+ luci::CircleReshape *reshape_of_ifm = nullptr;
luci::CircleMean *mean_of_ifm = nullptr;
+ luci::CircleMean *mean_of_reshape = nullptr;
luci::CircleSquaredDifference *sqdiff = nullptr;
luci::CircleMean *mean_as_variance = nullptr;
luci::CircleConst *const_as_epsilon = nullptr;
@@ -254,12 +364,14 @@ public:
luci::CircleMul *mul_gamma = nullptr;
luci::CircleMul *mul_as_scaled_ifm = nullptr;
luci::CircleMul *mul_as_scaled_mean = nullptr;
+ luci::CircleMul *mul_as_scaled_reshape = nullptr;
luci::CircleConst *const_as_beta = nullptr;
luci::CircleSub *sub = nullptr;
luci::CircleAdd *add_as_terminal = nullptr;
private:
bool _matched = false;
+ PatternVersion _pv;
};
bool InstanceNormPattern::matched()
@@ -273,8 +385,18 @@ bool InstanceNormPattern::matched()
// Check order is DFS
- CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
- CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+ if (_pv == PatternVersion::Version_0)
+ {
+ CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+ }
+ if (_pv == PatternVersion::Version_1)
+ {
+ CHECK_OR_FALSE(fill(&mul_as_scaled_reshape, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(
+ fill(&reshape_of_ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_reshape));
+ ifm = reshape_of_ifm->tensor();
+ }
CHECK_OR_FALSE(loco::shape_known(ifm));
auto ifm_shape = loco::shape_get(ifm);
@@ -284,7 +406,15 @@ bool InstanceNormPattern::matched()
uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();
CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
- CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+
+ if (_pv == PatternVersion::Version_0)
+ {
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+ }
+ if (_pv == PatternVersion::Version_1)
+ {
+ CHECK_OR_FALSE(is_quasi_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+ }
add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
CHECK_OR_FALSE(add_as_variance);
@@ -296,29 +426,69 @@ bool InstanceNormPattern::matched()
// TODO Support regarding broadcast
CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
- CHECK_OR_FALSE(is_instance_mean(mean_as_variance));
+ if (_pv == PatternVersion::Version_0)
+ {
+ CHECK_OR_FALSE(is_instance_mean_v0(mean_as_variance));
+ }
+ if (_pv == PatternVersion::Version_1)
+ {
+ CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance));
+ }
+
sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input());
CHECK_OR_FALSE(sqdiff);
- loco::Node *ifm_should_be = nullptr;
- CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
- CHECK_OR_FALSE(ifm == ifm_should_be);
- CHECK_OR_FALSE(is_instance_mean(mean_of_ifm));
- CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+ if (_pv == PatternVersion::Version_0)
+ {
+ loco::Node *ifm_should_be = nullptr;
+ CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(ifm == ifm_should_be);
+ CHECK_OR_FALSE(is_instance_mean_v0(mean_of_ifm));
+ CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+ }
+ if (_pv == PatternVersion::Version_1)
+ {
+ loco::Node *reshape_should_be = nullptr;
+ CHECK_OR_FALSE(fill(&reshape_should_be, &mean_of_reshape).with_commutative_args_of(sqdiff));
+ CHECK_OR_FALSE(reshape_of_ifm == reshape_should_be);
+ CHECK_OR_FALSE(is_instance_mean_v1(mean_of_reshape));
+ CHECK_OR_FALSE(reshape_of_ifm == mean_of_reshape->input());
+ }
const_as_beta = dynamic_cast<luci::CircleConst *>(sub->x());
CHECK_OR_FALSE(const_as_beta);
- CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+
+ if (_pv == PatternVersion::Version_0)
+ {
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+ }
+ if (_pv == PatternVersion::Version_1)
+ {
+ CHECK_OR_FALSE(is_quasi_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+ }
mul_as_scaled_mean = dynamic_cast<luci::CircleMul *>(sub->y());
CHECK_OR_FALSE(mul_as_scaled_mean);
luci::CircleMul *mul_gamma_should_be = nullptr;
luci::CircleMean *mean_of_ifm_should_be = nullptr;
- CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
- .with_commutative_args_of(mul_as_scaled_mean));
- CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
- CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
+ luci::CircleMean *mean_of_reshape_should_be = nullptr;
+
+ if (_pv == PatternVersion::Version_0)
+ {
+ CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)
+ .with_commutative_args_of(mul_as_scaled_mean));
+ CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
+ CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);
+ }
+ if (_pv == PatternVersion::Version_1)
+ {
+ CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_reshape_should_be)
+ .with_commutative_args_of(mul_as_scaled_mean));
+ CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);
+ CHECK_OR_FALSE(mean_of_reshape == mean_of_reshape_should_be);
+ }
+
#undef CHECK_OR_FALSE
_matched = true;
return true;
@@ -381,13 +551,28 @@ namespace luci
bool FuseInstanceNormPass::run(loco::Graph *g)
{
bool changed = false;
+ luci::CircleAdd *add;
+ InstanceNormPattern::PatternVersion pv;
+
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- auto add = dynamic_cast<luci::CircleAdd *>(node);
- if (not add)
- continue;
+ auto reshape = dynamic_cast<luci::CircleReshape *>(node);
+ if (not reshape)
+ {
+ add = dynamic_cast<luci::CircleAdd *>(node);
+ if (not add)
+ continue;
+ pv = InstanceNormPattern::PatternVersion::Version_0;
+ }
+ else
+ {
+ add = dynamic_cast<luci::CircleAdd *>(reshape->tensor());
+ if (not add)
+ continue;
+ pv = InstanceNormPattern::PatternVersion::Version_1;
+ }
- InstanceNormPattern pattern(add);
+ InstanceNormPattern pattern(add, pv);
if (not pattern.matched())
continue;
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
new file mode 100644
index 000000000..3037f3def
--- /dev/null
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FuseInstanceNormPassInternal.h"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void setShape(luci::CircleNode &node, const std::vector<int> &v)
+{
+ node.rank(v.size());
+ for (int i = 0; i < v.size(); ++i)
+ {
+ node.dim(i) = v[i];
+ }
+}
+
+} // namespace
+
+TEST(FuseInstanceNormPass, is_quasi_1D_with_dummy_dim)
+{
+ luci::CircleConst const_node;
+
+ setShape(const_node, {});
+ EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {1});
+ EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {8});
+ EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {1, 2, 1, 8, 1});
+ EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {8, 3});
+ EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {8, 1});
+ EXPECT_FALSE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {1, 8, 1});
+ EXPECT_TRUE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+
+ setShape(const_node, {1, 1, 1, 8, 1});
+ EXPECT_TRUE(is_quasi_1D_with_dummy_dim(&const_node, 8));
+}
diff --git a/compiler/luci/pass/src/FuseInstanceNormPassInternal.h b/compiler/luci/pass/src/FuseInstanceNormPassInternal.h
new file mode 100644
index 000000000..32b638ba5
--- /dev/null
+++ b/compiler/luci/pass/src/FuseInstanceNormPassInternal.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_FUSE_INSTANCE_NORM_PASS_INTERNAL_H__
+#define __LUCI_CIRCLE_FUSE_INSTANCE_NORM_PASS_INTERNAL_H__
+
+#include <luci/IR/CircleNodes.h>
+
+/// @return true When node has shape of '1 x .. x 1 x depth'
+bool is_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth);
+
+/// @return true When node has shape of '1 x .. x depth x 1'
+bool is_quasi_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth);
+
+#endif // __LUCI_CIRCLE_FUSE_INSTANCE_NORM_PASS_INTERNAL_H__
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
new file mode 100644
index 000000000..6726ce746
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizationUtils.h"
+
+#include <luci/Log.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
+ float &nudged_min, float &nudged_max)
+{
+ assert(min != max);
+
+ const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+ const int32_t kMinScale = -kMaxScale;
+ const double qmin_double = kMinScale;
+ const double qmax_double = kMaxScale;
+ const double rmin = std::fmin(0, min);
+ const double rmax = std::fmax(0, max);
+ double scale_factor_from_min_side{0};
+ double scale_factor_from_max_side{0};
+
+ if ((qmin_double * rmin) > 0)
+ scale_factor_from_min_side = rmin / qmin_double;
+
+ if ((qmax_double * rmax) > 0)
+ scale_factor_from_max_side = rmax / qmax_double;
+
+ scaling_factor = scale_factor_from_min_side > scale_factor_from_max_side
+ ? scale_factor_from_min_side
+ : scale_factor_from_max_side;
+ zp = 0;
+ nudged_min = static_cast<float>(qmin_double * scaling_factor);
+ nudged_max = static_cast<float>(qmax_double * scaling_factor);
+}
+
+void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
+ float &nudged_min, float &nudged_max)
+{
+ LOGGER(l);
+
+ assert(min <= max);
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+ const double qmin_double = kMinScale;
+ const double qmax_double = kMaxScale;
+ const double rmin = std::fmin(0, min);
+ const double rmax = std::fmax(0, max);
+
+ double scale = (rmax - rmin) / (qmax_double - qmin_double);
+ double zero_point_double = 0;
+ uint8_t nudged_zero_point = 0;
+ if (scale == 0)
+ {
+ WARN(l) << "The minimum and maximum values are the same." << std::endl;
+ if (min >= 0 && max >= 0)
+ zero_point_double = kMinScale;
+ else
+ zero_point_double = kMaxScale;
+ }
+ else
+ zero_point_double = qmin_double - rmin / scale;
+ if (zero_point_double <= qmin_double)
+ {
+ assert(min >= 0 && max >= 0);
+ nudged_zero_point = kMinScale;
+ scale = max / (qmax_double - qmin_double);
+ if (min > 0 && max > 0)
+ WARN(l) << "The minimum and maximum values are all positive." << std::endl;
+ }
+ else if (zero_point_double >= qmax_double)
+ {
+ assert(min < 0 && max < 0);
+ nudged_zero_point = kMaxScale;
+ scale = -min / (qmax_double - qmin_double);
+ WARN(l) << "The minimum and maximum values are all negative." << std::endl;
+ }
+ else
+ {
+ assert(min < 0 && max >= 0);
+ nudged_zero_point = static_cast<uint8_t>(std::round(zero_point_double));
+ }
+
+ nudged_min = static_cast<float>((qmin_double - nudged_zero_point) * scale);
+ nudged_max = static_cast<float>((qmax_double - nudged_zero_point) * scale);
+
+ scaling_factor = scale;
+ zp = nudged_zero_point;
+}
+
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index)
+{
+ auto succs = loco::succs(node);
+ if (succs.size() != 1) // assume weights is used by only one node
+ return false;
+
+ for (auto out : succs)
+ {
+ auto conv = dynamic_cast<CircleConv2D *>(out);
+ auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+ auto tw_conv = dynamic_cast<CircleTransposeConv *>(out);
+ auto fc = dynamic_cast<CircleFullyConnected *>(out);
+
+ // Refer to https://github.com/Samsung/ONE/pull/2448.
+ if ((conv != nullptr && conv->filter() == node) ||
+ (tw_conv != nullptr && tw_conv->filter() == node)) // OHWI
+ {
+ assert(node->rank() == 4);
+ dimension.dim(0).set(node->dim(0).value());
+ dimension.dim(1).set(node->dim(1).value());
+ dimension.dim(2).set(node->dim(2).value());
+ dimension.dim(3).set(node->dim(3).value());
+ channel_dim_index = 0; // Set channel_dim_index based on "O"
+ return true;
+ }
+ else if (dw_conv != nullptr && dw_conv->filter() == node) // IHWC
+ {
+ assert(node->rank() == 4);
+ dimension.dim(0).set(node->dim(0).value());
+ dimension.dim(1).set(node->dim(1).value());
+ dimension.dim(2).set(node->dim(2).value());
+ dimension.dim(3).set(node->dim(3).value());
+ channel_dim_index = 3; // Set channel_dim_index based on "C"
+ return true;
+ }
+ else if (fc != nullptr && fc->weights() == node) // OI
+ {
+ assert(node->rank() == 2);
+ dimension.dim(0).set(node->dim(0).value());
+ dimension.dim(1).set(1); // Set FC layer like CONV
+ dimension.dim(2).set(1);
+ dimension.dim(3).set(node->dim(1).value());
+ channel_dim_index = 0; // Set channel_dim_index based on "O"
+ return true;
+ }
+ else
+ {
+ // node does not support channle-wise quantization
+ assert(false);
+ }
+ }
+
+ return false;
+}
+
+uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
+{
+ return indices[0] * dimension.dim(1).value() * dimension.dim(2).value() *
+ dimension.dim(3).value() +
+ indices[1] * dimension.dim(2).value() * dimension.dim(3).value() +
+ indices[2] * dimension.dim(3).value() + indices[3];
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h
new file mode 100644
index 000000000..ec0e86df8
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZATION_UTILS_H__
+#define __LUCI_QUANTIZATION_UTILS_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <loco/IR/TensorShape.h>
+
+namespace luci
+{
+
+void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
+ float &nudged_min, float &nudged_max);
+
+void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
+ float &nudged_min, float &nudged_max);
+
+bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension, int &channel_dim_index);
+
+uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZATION_UTILS_H__
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
new file mode 100644
index 000000000..c492234c7
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -0,0 +1,495 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+#include <loco/IR/TensorShape.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+namespace
+{
+
+void cal_minmax_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max)
+{
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+ int size{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+ size = dimension.dim(channel_dim_index).value();
+
+ std::vector<bool> has_min_max_value(size, false);
+ min.resize(size);
+ max.resize(size);
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ if (has_min_max_value[channel_idx])
+ {
+ min[channel_idx] = data < min[channel_idx] ? data : min[channel_idx];
+ max[channel_idx] = data > max[channel_idx] ? data : max[channel_idx];
+ }
+ else
+ {
+ min[channel_idx] = data;
+ max[channel_idx] = data;
+ has_min_max_value[channel_idx] = true;
+ }
+ }
+ }
+ }
+ }
+}
+
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &min, std::vector<float> &max,
+ std::vector<float> &scaling_factor, std::vector<int64_t> &zp,
+ std::vector<float> &nudged_min, std::vector<float> &nudged_max)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+ const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+ const int32_t kMinScale = -kMaxScale;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ for (size_t i = 0; i < min.size(); ++i)
+ {
+ compute_sym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+ }
+
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+ data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round(data * scaling_factor_inv));
+ }
+ }
+ }
+ }
+
+ node->dtype(loco::DataType::S16); // change the type of tensor
+ node->size<loco::DataType::S16>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::S16>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void sym_wdequant_per_channel(CircleConst *node, std::vector<float> &scaling_factor)
+{
+ assert(node->dtype() == loco::DataType::S16);
+ uint32_t size = node->size<loco::DataType::S16>();
+ std::vector<float> dequantized_values(size);
+
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::S16>(cal_offset(dimension, indices));
+ dequantized_values[cal_offset(dimension, indices)] =
+ static_cast<float>(data) * scaling_factor[channel_idx];
+ }
+ }
+ }
+ }
+
+ node->dtype(loco::DataType::FLOAT32); // change the type of tensor
+ node->size<loco::DataType::FLOAT32>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::FLOAT32>(i) = dequantized_values[i];
+ }
+}
+
+void asymmetric_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+ std::vector<float> &max, std::vector<float> &scaling_factor,
+ std::vector<int64_t> &zp, std::vector<float> &nudged_min,
+ std::vector<float> &nudged_max)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ for (size_t i = 0; i < min.size(); ++i)
+ {
+ compute_asym_scale_zp(min[i], max[i], scaling_factor[i], zp[i], nudged_min[i], nudged_max[i]);
+ }
+
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ data = data < nudged_min[channel_idx] ? nudged_min[channel_idx] : data;
+ data = data > nudged_max[channel_idx] ? nudged_max[channel_idx] : data;
+ quantized_values[cal_offset(dimension, indices)] = static_cast<int32_t>(
+ std::round((data - nudged_min[channel_idx]) * scaling_factor_inv));
+ }
+ }
+ }
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void asymmetric_wdequant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
+ std::vector<float> &nudged_min)
+{
+ assert(node->dtype() == loco::DataType::U8);
+ uint32_t size = node->size<loco::DataType::U8>();
+ std::vector<float> dequantized_values(size);
+
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ auto data = node->at<loco::DataType::U8>(cal_offset(dimension, indices));
+ dequantized_values[cal_offset(dimension, indices)] =
+ static_cast<float>(data) * scaling_factor[channel_idx] + nudged_min[channel_idx];
+ }
+ }
+ }
+ }
+
+ node->dtype(loco::DataType::FLOAT32); // change the type of tensor
+ node->size<loco::DataType::FLOAT32>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::FLOAT32>(i) = dequantized_values[i];
+ }
+}
+
+void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
+ float &scaling_factor, int64_t &zp, float &nudged_min,
+ float &nudged_max)
+{
+
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ const float scaling_factor_inv = 1.0 / scaling_factor;
+ std::vector<int32_t> quantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ // clipping
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ data = data < nudged_min ? nudged_min : data;
+ data = data > nudged_max ? nudged_max : data;
+ quantized_values[i] =
+ static_cast<int32_t>(std::round((data - nudged_min) * scaling_factor_inv));
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void asymmetric_wdequant_with_minmax_per_layer(CircleConst *node, float scaling_factor,
+ float nudged_min)
+{
+ uint32_t size = node->size<loco::DataType::U8>();
+ std::vector<float> dequantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ auto data = node->at<loco::DataType::U8>(i);
+ dequantized_values[i] = static_cast<float>(data) * scaling_factor + nudged_min;
+ }
+
+ node->dtype(loco::DataType::FLOAT32); // change the type of tensor
+ node->size<loco::DataType::FLOAT32>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::FLOAT32>(i) = dequantized_values[i];
+ }
+}
+
+bool is_quantized(const CircleNode *node)
+{
+ return node->dtype() == loco::DataType::U8 || // activation, weight
+ node->dtype() == loco::DataType::S16 || // activation, weight
+ node->dtype() == loco::DataType::S32; // bias
+}
+
+// Check if node is weights of conv2d, transepose_conv2d, depthwise_conv2d, or fully_connected layer
+bool is_weights(CircleNode *node)
+{
+ auto circle_const = dynamic_cast<CircleConst *>(node);
+ if (circle_const == nullptr)
+ return false;
+
+ auto succs = loco::succs(node);
+ if (succs.size() != 1) // assume weights is used by only one node
+ return false;
+
+ for (auto out : succs)
+ {
+ auto conv = dynamic_cast<CircleConv2D *>(out);
+ if (conv != nullptr && conv->filter() == circle_const && circle_const->rank() == 4)
+ return true;
+
+ auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+ if (dw_conv != nullptr && dw_conv->filter() == circle_const && circle_const->rank() == 4)
+ return true;
+
+ auto tw_conv = dynamic_cast<CircleTransposeConv *>(out);
+ if (tw_conv != nullptr && tw_conv->filter() == circle_const && circle_const->rank() == 4)
+ return true;
+
+ auto fc = dynamic_cast<CircleFullyConnected *>(out);
+ if (fc != nullptr && fc->weights() == circle_const && circle_const->rank() == 2)
+ return true;
+ }
+ return false;
+}
+
+/**
+ * @brief QuantizeDequantizeWeights quantizes and dequantizes tensors for weights
+ * @details Find min/max values on the fly, quantize the model, and dequantize the model
+ */
+struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
+{
+ QuantizeDequantizeWeights(loco::DataType input, loco::DataType output,
+ QuantizationGranularity granularity)
+ : input_type(input), output_type(output), granularity(granularity)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+ QuantizationGranularity granularity;
+
+ // Quantize and dequantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ assert(output_type == loco::DataType::U8 || output_type == loco::DataType::S16);
+ LOGGER(l);
+ INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this is already quantized
+ if (is_quantized(circle_node))
+ continue;
+
+ if (is_weights(circle_node))
+ {
+ auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
+
+ // Find min/max per channel-wise
+ if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ std::vector<float> min;
+ std::vector<float> max;
+
+ cal_minmax_per_channel(circle_const, min, max);
+
+ std::vector<float> nudged_min(min.size());
+ std::vector<float> nudged_max(min.size());
+ std::vector<float> scaling_factor(min.size());
+ std::vector<int64_t> zp(min.size());
+
+ if (output_type == loco::DataType::U8)
+ {
+ asymmetric_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ asymmetric_wdequant_per_channel(circle_const, scaling_factor, nudged_min);
+ }
+ else
+ {
+ sym_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ sym_wdequant_per_channel(circle_const, scaling_factor);
+ }
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->min = nudged_min;
+ quantparam->max = nudged_max;
+ quantparam->scale = scaling_factor;
+ quantparam->zerop = zp;
+ circle_node->quantparam(std::move(quantparam));
+ }
+ // Find min/max per layer-wise
+ else
+ {
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+ for (uint32_t i = 0; i < circle_const->size<loco::DataType::FLOAT32>(); i++)
+ {
+ auto data = circle_const->at<loco::DataType::FLOAT32>(i);
+ min = data < min ? data : min;
+ max = data > max ? data : max;
+ }
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ asymmetric_wquant_with_minmax_per_layer(circle_const, min, max, scaling_factor, zp,
+ nudged_min, nudged_max);
+ asymmetric_wdequant_with_minmax_per_layer(circle_const, scaling_factor, nudged_min);
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->min.push_back(nudged_min);
+ quantparam->max.push_back(nudged_max);
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ circle_node->quantparam(std::move(quantparam));
+ }
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace
+
+bool QuantizeDequantizeWeightsPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeDequantizeWeightsPass Start" << std::endl;
+
+ // Quantize weights
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ QuantizeDequantizeWeights qw(_input_dtype, _output_dtype, _granularity);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&qw);
+ }
+
+ INFO(l) << "QuantizeDequantizeWeightsPass End" << std::endl;
+ return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
new file mode 100644
index 000000000..f8abee751
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <oops/UserExn.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+namespace
+{
+
+// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
+// If true, return <input, weight> pair of the successor node (used to quantize bias)
+// If flase, return <nullptr, nullptr>
+std::pair<loco::Node *, loco::Node *> get_input_weight_of_bias(CircleNode *node)
+{
+ auto circle_const = dynamic_cast<CircleConst *>(node);
+ if (circle_const == nullptr)
+ return std::make_pair(nullptr, nullptr);
+
+ auto succs = loco::succs(node);
+ if (succs.size() != 1) // assume bias is used by only one node
+ return std::make_pair(nullptr, nullptr);
+
+ for (auto out : succs)
+ {
+ auto conv = dynamic_cast<CircleConv2D *>(out);
+ if (conv != nullptr && conv->bias() == circle_const)
+ {
+ assert(conv->input() != nullptr);
+ assert(conv->filter() != nullptr);
+ return std::make_pair(conv->input(), conv->filter());
+ }
+ auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+ if (dw_conv != nullptr && dw_conv->bias() == circle_const)
+ {
+ assert(dw_conv->input() != nullptr);
+ assert(dw_conv->filter() != nullptr);
+ return std::make_pair(dw_conv->input(), dw_conv->filter());
+ }
+ auto fc = dynamic_cast<CircleFullyConnected *>(out);
+ if (fc != nullptr && fc->bias() == circle_const)
+ {
+ assert(fc->input() != nullptr);
+ assert(fc->weights() != nullptr);
+ return std::make_pair(fc->input(), fc->weights());
+ }
+ }
+ return std::make_pair(nullptr, nullptr);
+}
+
+void asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
+ float *scaling_factor, int64_t *zp)
+{
+ float scale = input_scale * weight_scale;
+ const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ quantized_values[i] =
+ static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ }
+
+ node->dtype(loco::DataType::S32); // change the type of tensor
+ node->size<loco::DataType::S32>(size); // resize tensor
+ const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+ const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::S32>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+ *scaling_factor = scale;
+ *zp = 0;
+}
+
+void quant_bias_per_channel(CircleConst *node, float input_scale, std::vector<float> &weight_scale,
+ std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+{
+ float scaling_factor_inv{0};
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ scaling_factor[i] = input_scale * weight_scale[i];
+ scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+ quantized_values[i] =
+ static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+ zp[i] = 0;
+ }
+
+ node->dtype(loco::DataType::S32); // change the type of tensor
+ node->size<loco::DataType::S32>(size); // resize tensor
+ const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+ const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::S32>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+bool has_min_max(const CircleNode *node)
+{
+ return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
+}
+
+bool is_quantized(const CircleNode *node)
+{
+ return node->dtype() == loco::DataType::U8 || // activation, weight
+ node->dtype() == loco::DataType::S32; // bias
+}
+
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+ const int32_t kMinScale = -kMaxScale;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round(data * scaling_factor_inv));
+ }
+ }
+ }
+ }
+
+ node->dtype(loco::DataType::S16); // change the type of tensor
+ node->size<loco::DataType::S16>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::S16>(i) =
+ std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+ std::vector<float> &scaling_factor)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+ std::vector<int32_t> quantized_values(size);
+
+ loco::TensorShape dimension;
+ dimension.rank(4);
+ uint32_t indices[4] = {
+ 0,
+ };
+ int channel_dim_index{0};
+
+ if (!get_channel_dim_index(node, dimension, channel_dim_index))
+ {
+ assert(false);
+ return;
+ }
+
+ for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+ {
+ for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+ {
+ for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+ {
+ for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+ {
+ int channel_idx = indices[channel_dim_index];
+ const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+ auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+ quantized_values[cal_offset(dimension, indices)] =
+ static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
+ }
+ }
+ }
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
+{
+ const int32_t kMinScale = 0;
+ const int32_t kMaxScale = 255;
+
+ uint32_t size = node->size<loco::DataType::FLOAT32>();
+
+ const float scaling_factor_inv = 1.0 / scaling_factor;
+ std::vector<int32_t> quantized_values(size);
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
+ }
+
+ node->dtype(loco::DataType::U8); // change the type of tensor
+ node->size<loco::DataType::U8>(size); // resize tensor
+ for (uint32_t i = 0; i < size; ++i)
+ {
+ node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+ }
+}
+
+// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
+bool is_weights(CircleNode *node)
+{
+ auto circle_const = dynamic_cast<CircleConst *>(node);
+ if (circle_const == nullptr)
+ return false;
+
+ auto succs = loco::succs(node);
+ if (succs.size() != 1) // assume weights is used by only one node
+ return false;
+
+ for (auto out : succs)
+ {
+ auto conv = dynamic_cast<CircleConv2D *>(out);
+ if (conv != nullptr && conv->filter() == circle_const)
+ return true;
+
+ auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+ if (dw_conv != nullptr && dw_conv->filter() == circle_const)
+ return true;
+
+ auto fc = dynamic_cast<CircleFullyConnected *>(out);
+ if (fc != nullptr && fc->weights() == circle_const)
+ return true;
+ }
+ return false;
+}
+
+/**
+ * @brief QuantizeActivation quantizes tensors for activations
+ * @details Quantize using recorded min/max values
+ */
+struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
+{
+ QuantizeActivation(loco::DataType input, loco::DataType output)
+ : input_type(input), output_type(output)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+
+ // Quantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this is already quantized
+ if (is_quantized(circle_node))
+ continue;
+
+ // Check if this is bias (bias is quantized later)
+ auto iw = get_input_weight_of_bias(circle_node);
+ if (iw.first != nullptr && iw.second != nullptr)
+ continue;
+
+ // Check if this is activation
+ // We assume min/max are recorded only for activations
+ if (has_min_max(circle_node) && !is_weights(circle_node))
+ {
+ // Quantize using recorded min/max
+ auto quantparam = circle_node->quantparam();
+ assert(quantparam->min.size() == 1); // only support layer-wise quant
+ assert(quantparam->max.size() == 1); // only support layer-wise quant
+ auto min = quantparam->min[0];
+ auto max = quantparam->max[0];
+
+ float scaling_factor{0};
+ int64_t zp{0};
+ float nudged_min{0};
+ float nudged_max{0};
+
+ if (output_type == loco::DataType::U8)
+ {
+ compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ circle_node->dtype(loco::DataType::U8);
+ }
+ else
+ {
+ compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+ circle_node->dtype(loco::DataType::S16);
+ }
+
+ circle_node->quantparam()->max[0] = nudged_max;
+ circle_node->quantparam()->min[0] = nudged_min;
+ circle_node->quantparam()->scale.push_back(scaling_factor);
+ circle_node->quantparam()->zerop.push_back(zp);
+ }
+ }
+ return false;
+ }
+};
+
+struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
+{
+ QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+ : input_type(input), output_type(output), granularity(gr)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+ QuantizationGranularity granularity;
+
+ // Quantize bias node
+ bool visit(luci::CircleNode *node)
+ {
+ // Check if this is already quantized
+ if (is_quantized(node))
+ return false;
+
+ // Check if this is bias
+ auto iw = get_input_weight_of_bias(node);
+ if (iw.first == nullptr || iw.second == nullptr)
+ return false;
+
+ auto input = loco::must_cast<luci::CircleNode *>(iw.first);
+ auto weight = loco::must_cast<luci::CircleNode *>(iw.second);
+
+ if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ assert(input->quantparam()->scale.size() == 1); // input scale's layer-wise
+ auto input_scale = input->quantparam()->scale[0];
+
+ assert(weight->quantparam() != nullptr); // weight scale's channel-wise
+ auto weight_scale = weight->quantparam()->scale;
+
+ auto circle_const = loco::must_cast<luci::CircleConst *>(node);
+
+ uint32_t size = circle_const->size<loco::DataType::FLOAT32>();
+ assert(size == weight_scale.size());
+ std::vector<float> scaling_factor(size);
+ std::vector<int64_t> zp(size);
+
+ quant_bias_per_channel(circle_const, input_scale, weight_scale, scaling_factor, zp);
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale = scaling_factor;
+ quantparam->zerop = zp;
+ assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
+ circle_const->quantparam(std::move(quantparam));
+ }
+ else
+ {
+ assert(input->quantparam()->scale.size() == 1); // Only support per-layer quant
+ auto input_scale = input->quantparam()->scale[0];
+
+ assert(weight->quantparam()->scale.size() == 1); // Only support per-layer quant
+ auto weight_scale = weight->quantparam()->scale[0];
+
+ auto circle_const = loco::must_cast<luci::CircleConst *>(node);
+ float scaling_factor{0};
+ int64_t zp{0};
+ asym_quant_bias_per_layer(circle_const, input_scale, weight_scale, &scaling_factor, &zp);
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ assert(circle_const->quantparam() == nullptr); // bias should not be quantized before
+ circle_const->quantparam(std::move(quantparam));
+ }
+ return false;
+ }
+};
+
+/**
+ * @brief QuantizeWeights quantizes tensors for weights
+ * @details Find min/max values on the fly and then quantize
+ */
+struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
+{
+ QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+ : input_type(input), output_type(output), granularity(gr)
+ {
+ }
+
+ loco::DataType input_type;
+ loco::DataType output_type;
+ QuantizationGranularity granularity;
+
+ // Quantize input tensors of each node
+ bool visit(luci::CircleNode *node)
+ {
+ LOGGER(l);
+ INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
+ auto arity = node->arity();
+ for (uint32_t i = 0; i < arity; i++)
+ {
+ auto input_node = node->arg(i);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+ // Check if this is already quantized
+ if (is_quantized(circle_node))
+ continue;
+
+ if (is_weights(circle_node))
+ {
+ auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
+
+ // Find min/max per channel-wise
+ if (granularity == QuantizationGranularity::ChannelWise)
+ {
+ auto quantparam = circle_node->quantparam();
+ assert(quantparam != nullptr);
+ auto min = quantparam->min;
+ auto scaling_factor = quantparam->scale;
+
+ if (output_type == loco::DataType::U8)
+ {
+ asym_wquant_per_channel(circle_const, min, scaling_factor);
+ }
+ else
+ {
+ sym_wquant_per_channel(circle_const, scaling_factor);
+ }
+ }
+ // Find min/max per layer-wise
+ else
+ {
+ // Quantize using recorded quantparam
+ auto quantparam = circle_node->quantparam();
+ assert(quantparam != nullptr);
+ assert(quantparam->min.size() == 1); // only support layer-wise quant
+ assert(quantparam->scale.size() == 1); // only support layer-wise quant
+ auto min = quantparam->min[0];
+ auto scaling_factor = quantparam->scale[0];
+ asym_wquant_per_layer(circle_const, min, scaling_factor);
+ }
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace
+
+bool QuantizeWithMinMaxPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
+
+ // Quantize activation
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ QuantizeActivation qa(_input_dtype, _output_dtype);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&qa);
+ }
+
+ // Quantize weights
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&qw);
+ }
+
+ // Quantize bias
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
+ auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+ circle_node->accept(&qb);
+ }
+
+ // Update output dtype
+ auto graph_outputs = g->outputs();
+ for (auto node : loco::output_nodes(g))
+ {
+ auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
+ if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+ {
+ circle_node->dtype(_output_dtype);
+ auto graph_output = graph_outputs->at(circle_node->index());
+ graph_output->dtype(_output_dtype);
+ }
+ }
+
+ INFO(l) << "QuantizeWithMinMaxPass End" << std::endl;
+ return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
new file mode 100644
index 000000000..e52d667d7
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpAddPass.h"
+
+#include "flatbuffers/flexbuffers.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+namespace
+{
+
+/// @brief Returns the index of BroadcastTo node among cop's inputs.
+// NOTE This function assumes there is only one BroadcastTo node among its inputs.
+int32_t get_broadcastTo_index_among_inputs_of(luci::CircleCustom *cop)
+{
+ for (uint32_t idx = 0; idx < cop->numInputs(); idx++)
+ {
+ auto input = dynamic_cast<const luci::CircleCustomOut *>(cop->inputs(idx));
+ if (input)
+ {
+ auto broadcastTo = loco::must_cast<luci::CircleCustom *>(input->input());
+ if (broadcastTo->custom_code() == "BroadcastTo")
+ return idx;
+ }
+ }
+
+ return -1;
+}
+
+/** BEFORE
+ * [CircleConst]
+ * |
+ * [CircleNode] [BroadcastTo(CircleCustom)]
+ * \ |
+ * \ [CircleCustomOUt]
+ * \ /
+ * [AddV2(CircleCustom)]
+ * AFTER
+ *
+ * [CircleConst] [CircleNode]
+ * \ /
+ * \ /
+ * [CircleAdd]
+ */
+bool resolve_with_BroadcastTo(luci::CircleCustom *addv2)
+{
+ int32_t broadcastTo_idx = get_broadcastTo_index_among_inputs_of(addv2);
+
+ if (broadcastTo_idx == -1)
+ return false;
+
+ auto input = loco::must_cast<const luci::CircleCustomOut *>(addv2->inputs(broadcastTo_idx));
+ auto broadcastTo = loco::must_cast<luci::CircleCustom *>(input->input());
+
+ auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
+ add->fusedActivationFunction(luci::FusedActFunc::NONE);
+ add->x(addv2->inputs(1 - broadcastTo_idx));
+ add->y(broadcastTo->inputs(0));
+ auto customOut = loco::succs(addv2);
+ assert(customOut.size() == 1);
+ replace(*customOut.begin()).with(add);
+
+ return true;
+}
+
+bool resolve_custom_op(luci::CircleCustom *addv2)
+{
+ const std::string custom_code = addv2->custom_code();
+ const std::vector<uint8_t> custom_options = addv2->custom_options();
+
+ if (custom_code != "AddV2")
+ return false;
+
+ if (resolve_with_BroadcastTo(addv2))
+ return true;
+
+ auto add = addv2->graph()->nodes()->create<luci::CircleAdd>();
+ add->fusedActivationFunction(luci::FusedActFunc::NONE);
+ add->x(addv2->inputs(0));
+ add->y(addv2->inputs(1));
+ auto customOut = loco::succs(addv2);
+ assert(customOut.size() == 1);
+ replace(*customOut.begin()).with(add);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ResolveCustomOpAddPass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto cop = dynamic_cast<luci::CircleCustom *>(node);
+ if (not cop)
+ continue;
+
+ changed |= resolve_custom_op(cop);
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
new file mode 100644
index 000000000..145e9cb62
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
+
+#include "flatbuffers/flexbuffers.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool resolve_custom_op(luci::CircleCustom *cop)
+{
+ const std::string custom_code = cop->custom_code();
+ const std::vector<uint8_t> custom_options = cop->custom_options();
+
+ if (custom_code == "BatchMatMulV2")
+ {
+ auto batch_matmul = cop->graph()->nodes()->create<luci::CircleBatchMatMul>();
+ // input
+ batch_matmul->x(cop->inputs(0));
+ batch_matmul->y(cop->inputs(1));
+ // TODO find much better way of parsing custom_options
+ // adj
+ auto map = flexbuffers::GetRoot(custom_options).AsMap();
+ batch_matmul->adj_x(map["adj_x"].AsBool());
+ batch_matmul->adj_y(map["adj_y"].AsBool());
+
+ replace(cop).with(batch_matmul);
+ return true;
+ }
+ return false;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ResolveCustomOpBatchMatMulPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto cop = dynamic_cast<luci::CircleCustom *>(node);
+ if (not cop)
+ continue;
+
+ changed |= resolve_custom_op(cop);
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
new file mode 100644
index 000000000..547fd22fc
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpMatMulPass.h"
+
+#include "flatbuffers/flexbuffers.h"
+#include <loco/IR/DataTypeTraits.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco.h>
+#include <oops/InternalExn.h>
+#include <loco/Service/ShapeInference.h>
+#include <loco/Service/TypeInference.h>
+
+namespace
+{
+
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+ const std::vector<uint32_t> &shape,
+ const std::vector<T> &values)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ node->dtype(dtype);
+ node->rank(shape.size());
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->dim(i) = shape.at(i);
+ size *= shape.at(i);
+ }
+
+#define INIT_VALUES(DT) \
+ { \
+ node->size<DT>(size); \
+ for (uint32_t i = 0; i < values.size(); ++i) \
+ node->at<DT>(i) = values[i]; \
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ INIT_VALUES(loco::DataType::U8);
+ break;
+ case loco::DataType::S16:
+ INIT_VALUES(loco::DataType::S16);
+ break;
+ case loco::DataType::S32:
+ INIT_VALUES(loco::DataType::S32);
+ break;
+ case loco::DataType::FLOAT32:
+ INIT_VALUES(loco::DataType::FLOAT32)
+ break;
+ default:
+ INTERNAL_EXN("create_const_node called with unsupported type");
+ break;
+ }
+ return node;
+}
+
+bool resolve_matmul(luci::CircleCustom *cop)
+{
+#define CHECK_OR_FALSE(condition) \
+ if (not(condition)) \
+ return false;
+#define CHECK_OR_THROW(condition, message) \
+ if (not(condition)) \
+ INTERNAL_EXN(message);
+
+ auto graph = cop->graph();
+ const std::vector<uint8_t> custom_options = cop->custom_options();
+ auto map = flexbuffers::GetRoot(custom_options).AsMap();
+ const auto U8 = loco::DataType::U8;
+ const auto S16 = loco::DataType::S16;
+ const auto S32 = loco::DataType::S32;
+ const auto FLOAT32 = loco::DataType::FLOAT32;
+
+ bool transpose_a = map["transpose_a"].AsBool();
+ bool transpose_b = map["transpose_b"].AsBool();
+
+ loco::Node *lhs = cop->inputs(0);
+ loco::Node *rhs = cop->inputs(1);
+
+ // Check that the type of the first input is known
+ CHECK_OR_FALSE(loco::dtype_known(lhs));
+ auto lhs_dtype = loco::dtype_get(cop->inputs(0));
+
+ // If transpose of first input is requested, its shape must be known
+ CHECK_OR_FALSE(!transpose_a || loco::shape_known(lhs));
+ // and its rank should be at least 2
+ CHECK_OR_FALSE(!transpose_a || loco::shape_get(lhs).as<loco::TensorShape>().rank() >= 2);
+ // Check that the shape of the 2nd input is known
+ CHECK_OR_FALSE(loco::shape_known(rhs));
+ // TODO as of 06/23/20 TFLite only supports rank 2 for 2nd input. Fix this once that changes!
+ CHECK_OR_FALSE(loco::shape_get(rhs).as<loco::TensorShape>().rank() == 2);
+ // Check that input data type is supported
+ CHECK_OR_THROW(lhs_dtype == U8 || lhs_dtype == S16 || lhs_dtype == FLOAT32,
+ "Only UInt8, Int16 and Float32 data types are supported by MatMul");
+
+ if (transpose_a)
+ {
+ auto a_shape = loco::shape_get(lhs).as<loco::TensorShape>();
+ // Create a permutation constant node
+ std::vector<uint32_t> perm;
+ for (uint32_t i = 0; i < a_shape.rank(); ++i)
+ perm.push_back(i);
+ std::swap(perm[a_shape.rank() - 1], perm[a_shape.rank() - 2]);
+ auto perm_node = create_const_node(graph, S32, {a_shape.rank()}, perm);
+ // Now make a transpose node
+ auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
+ transpose_node->a(lhs);
+ transpose_node->perm(perm_node);
+ lhs = transpose_node;
+ }
+
+ // Transpose the second input if needed. TFLite FullyConnected operator
+ // assumes the second input is in column-major order, but the input is
+ // in row-major order, thus we need to convert between them.
+ if (!transpose_b)
+ {
+ const std::vector<uint32_t> perm{1, 0};
+ auto perm_node = create_const_node(graph, S32, {2}, perm);
+ auto transpose_node = graph->nodes()->create<luci::CircleTranspose>();
+ transpose_node->a(rhs);
+ transpose_node->perm(perm_node);
+ rhs = transpose_node;
+ }
+
+ // Make a constant zero-filled bias node
+ auto b_shape = loco::shape_get(cop->inputs(1)).as<loco::TensorShape>();
+ uint32_t bias_size = b_shape.dim(transpose_b ? 1 : 0).value();
+ const std::vector<float> val(bias_size, .0f);
+ auto bias_node = create_const_node(graph, lhs_dtype, {bias_size}, val);
+ auto fc_node = graph->nodes()->create<luci::CircleFullyConnected>();
+ fc_node->input(lhs);
+ fc_node->weights(rhs);
+ fc_node->bias(bias_node);
+ fc_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+ replace(cop).with(fc_node);
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ResolveCustomOpMatMulPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto cop = dynamic_cast<luci::CircleCustom *>(node);
+ if (not cop)
+ continue;
+
+ if (cop->custom_code() != "MatMul")
+ continue;
+
+ if (!resolve_matmul(cop))
+ continue;
+
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace luci