diff options
Diffstat (limited to 'compiler/luci/pass/src/QuantizationUtils.cpp')
-rw-r--r-- | compiler/luci/pass/src/QuantizationUtils.cpp | 158 |
1 files changed, 119 insertions, 39 deletions
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp index 2f6fed46e..ad86cedf4 100644 --- a/compiler/luci/pass/src/QuantizationUtils.cpp +++ b/compiler/luci/pass/src/QuantizationUtils.cpp @@ -33,43 +33,6 @@ bool is_quantized(const CircleNode *node) node->dtype() == loco::DataType::S64); // bias (int16 quant) } -// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer -bool is_weights(CircleNode *node) -{ - auto circle_const = dynamic_cast<CircleConst *>(node); - if (circle_const == nullptr) - return false; - - auto succs = loco::succs(node); - - // Node is weights if it is the weights of all of its successors - for (auto out : succs) - { - bool is_weights = false; - - auto conv = dynamic_cast<CircleConv2D *>(out); - if (conv != nullptr && conv->filter() == circle_const) - is_weights = true; - - auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out); - if (dw_conv != nullptr && dw_conv->filter() == circle_const) - is_weights = true; - - auto t_conv = dynamic_cast<CircleTransposeConv *>(out); - if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4) - is_weights = true; - - auto fc = dynamic_cast<CircleFullyConnected *>(out); - if (fc != nullptr && fc->weights() == circle_const) - is_weights = true; - - if (!is_weights) - return false; - } - - return true; -} - uint8_t fp32_to_uint8_cast(float f) { assert(std::numeric_limits<uint8_t>::min() <= f); @@ -77,7 +40,6 @@ uint8_t fp32_to_uint8_cast(float f) return static_cast<uint8_t>(f); } -// Per-layer quantization of weights (const tensor) using given min/max values void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max) @@ -107,7 +69,6 @@ void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float } } -// Per-layer quantization of weights (const tensor) using given min/max values void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max, float &scaling_factor, int64_t &zp, float &nudged_min, float &nudged_max) @@ -315,4 +276,123 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices) indices[2] * dimension.dim(3).value() + indices[3]; } +ActivationQType activation_qtype(const CircleNode *node) +{ + auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node); + if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH) + return ActivationQType::PreDefinedValue; + + switch (node->opcode()) + { + case CircleOpcode::LOGISTIC: + case CircleOpcode::TANH: + case CircleOpcode::SOFTMAX: + return ActivationQType::PreDefinedValue; + case CircleOpcode::FLOOR: + case CircleOpcode::FLOOR_DIV: + case CircleOpcode::FLOOR_MOD: + case CircleOpcode::CEIL: + return ActivationQType::IntScale; + default: + break; + } + + return ActivationQType::MinMax; +} + +std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype) +{ + auto qparam = std::make_unique<CircleQuantParam>(); + + auto set_qparam = [&qparam](float scale, int64_t zp) { + qparam->scale.emplace_back(scale); + qparam->zerop.emplace_back(zp); + }; + + switch (opcode) + { + case CircleOpcode::LOGISTIC: + if (dtype == loco::DataType::U8) + set_qparam(1.0f / 256.0f, 0); + else + { + assert(dtype == loco::DataType::S16); + set_qparam(1.0f / 32768.0f, 0); + } + break; + case CircleOpcode::TANH: + if (dtype == loco::DataType::U8) + set_qparam(2.0f / 256.0f, 128); + else + { + assert(dtype == loco::DataType::S16); + set_qparam(1.0f / 32768.0f, 0); + } + break; + case CircleOpcode::SOFTMAX: + if (dtype == loco::DataType::U8) + set_qparam(1.0f / 255.0f, 0); + else + { + assert(dtype == loco::DataType::S16); + set_qparam(1.0f / 32767.0f, 0); + } + break; + default: + throw std::runtime_error("Unsupported opcode with pre-defined qparam"); + } + return std::move(qparam); +} + +// For nodes with integer output, we use integer scale +void set_int_scale(luci::CircleNode *node) +{ + assert(node); // FIX_CALLER_UNLESS + + auto qparam = node->quantparam(); + assert(qparam); // FIX_CALLER_UNLESS + assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS + + auto fp_scale = qparam->scale[0]; + qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale); +} + +void quant_const(luci::CircleConst *node, loco::DataType quant_type) +{ + assert(node->dtype() == loco::DataType::FLOAT32); + + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++) + { + auto data = node->at<loco::DataType::FLOAT32>(i); + min = data < min ? data : min; + max = data > max ? data : max; + } + + float scaling_factor{0.0}; + int64_t zp{0}; + float nudged_min{0.0}; + float nudged_max{0.0}; + + switch (quant_type) + { + case loco::DataType::U8: + asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min, + nudged_max); + break; + case loco::DataType::S16: + symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min, + nudged_max); + break; + default: + throw std::runtime_error("Unsupported data type"); + } + + auto quantparam = std::make_unique<luci::CircleQuantParam>(); + quantparam->scale.push_back(scaling_factor); + quantparam->zerop.push_back(zp); + node->quantparam(std::move(quantparam)); +} + } // namespace luci |