summaryrefslogtreecommitdiff
path: root/compiler/luci/pass/src/QuantizationUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/luci/pass/src/QuantizationUtils.cpp')
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.cpp158
1 files changed, 119 insertions, 39 deletions
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
index 2f6fed46e..ad86cedf4 100644
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -33,43 +33,6 @@ bool is_quantized(const CircleNode *node)
node->dtype() == loco::DataType::S64); // bias (int16 quant)
}
-// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
-bool is_weights(CircleNode *node)
-{
- auto circle_const = dynamic_cast<CircleConst *>(node);
- if (circle_const == nullptr)
- return false;
-
- auto succs = loco::succs(node);
-
- // Node is weights if it is the weights of all of its successors
- for (auto out : succs)
- {
- bool is_weights = false;
-
- auto conv = dynamic_cast<CircleConv2D *>(out);
- if (conv != nullptr && conv->filter() == circle_const)
- is_weights = true;
-
- auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
- if (dw_conv != nullptr && dw_conv->filter() == circle_const)
- is_weights = true;
-
- auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
- if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
- is_weights = true;
-
- auto fc = dynamic_cast<CircleFullyConnected *>(out);
- if (fc != nullptr && fc->weights() == circle_const)
- is_weights = true;
-
- if (!is_weights)
- return false;
- }
-
- return true;
-}
-
uint8_t fp32_to_uint8_cast(float f)
{
assert(std::numeric_limits<uint8_t>::min() <= f);
@@ -77,7 +40,6 @@ uint8_t fp32_to_uint8_cast(float f)
return static_cast<uint8_t>(f);
}
-// Per-layer quantization of weights (const tensor) using given min/max values
void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max)
@@ -107,7 +69,6 @@ void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float
}
}
-// Per-layer quantization of weights (const tensor) using given min/max values
void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
float &scaling_factor, int64_t &zp, float &nudged_min,
float &nudged_max)
@@ -315,4 +276,123 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
indices[2] * dimension.dim(3).value() + indices[3];
}
+ActivationQType activation_qtype(const CircleNode *node)
+{
+ auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+ if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+ return ActivationQType::PreDefinedValue;
+
+ switch (node->opcode())
+ {
+ case CircleOpcode::LOGISTIC:
+ case CircleOpcode::TANH:
+ case CircleOpcode::SOFTMAX:
+ return ActivationQType::PreDefinedValue;
+ case CircleOpcode::FLOOR:
+ case CircleOpcode::FLOOR_DIV:
+ case CircleOpcode::FLOOR_MOD:
+ case CircleOpcode::CEIL:
+ return ActivationQType::IntScale;
+ default:
+ break;
+ }
+
+ return ActivationQType::MinMax;
+}
+
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype)
+{
+ auto qparam = std::make_unique<CircleQuantParam>();
+
+ auto set_qparam = [&qparam](float scale, int64_t zp) {
+ qparam->scale.emplace_back(scale);
+ qparam->zerop.emplace_back(zp);
+ };
+
+ switch (opcode)
+ {
+ case CircleOpcode::LOGISTIC:
+ if (dtype == loco::DataType::U8)
+ set_qparam(1.0f / 256.0f, 0);
+ else
+ {
+ assert(dtype == loco::DataType::S16);
+ set_qparam(1.0f / 32768.0f, 0);
+ }
+ break;
+ case CircleOpcode::TANH:
+ if (dtype == loco::DataType::U8)
+ set_qparam(2.0f / 256.0f, 128);
+ else
+ {
+ assert(dtype == loco::DataType::S16);
+ set_qparam(1.0f / 32768.0f, 0);
+ }
+ break;
+ case CircleOpcode::SOFTMAX:
+ if (dtype == loco::DataType::U8)
+ set_qparam(1.0f / 255.0f, 0);
+ else
+ {
+ assert(dtype == loco::DataType::S16);
+ set_qparam(1.0f / 32767.0f, 0);
+ }
+ break;
+ default:
+ throw std::runtime_error("Unsupported opcode with pre-defined qparam");
+ }
+ return std::move(qparam);
+}
+
+// For nodes with integer output, we use integer scale
+void set_int_scale(luci::CircleNode *node)
+{
+ assert(node); // FIX_CALLER_UNLESS
+
+ auto qparam = node->quantparam();
+ assert(qparam); // FIX_CALLER_UNLESS
+ assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
+
+ auto fp_scale = qparam->scale[0];
+ qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale);
+}
+
+void quant_const(luci::CircleConst *node, loco::DataType quant_type)
+{
+ assert(node->dtype() == loco::DataType::FLOAT32);
+
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+ for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
+ {
+ auto data = node->at<loco::DataType::FLOAT32>(i);
+ min = data < min ? data : min;
+ max = data > max ? data : max;
+ }
+
+ float scaling_factor{0.0};
+ int64_t zp{0};
+ float nudged_min{0.0};
+ float nudged_max{0.0};
+
+ switch (quant_type)
+ {
+ case loco::DataType::U8:
+ asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ break;
+ case loco::DataType::S16:
+ symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+ nudged_max);
+ break;
+ default:
+ throw std::runtime_error("Unsupported data type");
+ }
+
+ auto quantparam = std::make_unique<luci::CircleQuantParam>();
+ quantparam->scale.push_back(scaling_factor);
+ quantparam->zerop.push_back(zp);
+ node->quantparam(std::move(quantparam));
+}
+
} // namespace luci