summaryrefslogtreecommitdiff
path: root/onert-micro/luci-interpreter/pal/common/PALLogistic.h
diff options
context:
space:
mode:
Diffstat (limited to 'onert-micro/luci-interpreter/pal/common/PALLogistic.h')
-rw-r--r--onert-micro/luci-interpreter/pal/common/PALLogistic.h156
1 files changed, 156 insertions, 0 deletions
diff --git a/onert-micro/luci-interpreter/pal/common/PALLogistic.h b/onert-micro/luci-interpreter/pal/common/PALLogistic.h
new file mode 100644
index 000000000..c0e3a3c18
--- /dev/null
+++ b/onert-micro/luci-interpreter/pal/common/PALLogistic.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGISTIC_H
+#define LUCI_INTERPRETER_PAL_LOGISTIC_H
+
+#include "Params.h"
+#include "PALUtils.h"
+
+namespace luci_interpreter_pal
+{
+
+inline void Logistic(const int flat_size, const float *input_data, float *output_data)
+{
+ const float cutoff_upper = 16.619047164916992188f;
+ const float cutoff_lower = -9.f;
+
+ // Rational for using approximation in reference kernel.
+ // 0. This approximation gives enough precision for float.
+ // 1. This works around an issue on an embedded chipset where exp() does not
+ // return correctly as expected - exp(x) should return inf when overflown
+ // not 1.701417 IEEE 754 defines representation for inf.
+ // 2. This will speed up calculation and is matching the behavior in the
+ // optimized kernels. (check the definition of scalar_logistic_op<float>)
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ float val = input_data[i];
+ float result;
+ if (val > cutoff_upper)
+ {
+ result = 1.0f;
+ }
+ else if (val < cutoff_lower)
+ {
+ result = std::exp(val);
+ }
+ else
+ {
+ result = 1.f / (1.f + std::exp(-val));
+ }
+ output_data[i] = result;
+ }
+}
+
+inline void Logistic(const int flat_size, const int8_t *input_data, float input_scale,
+ int input_zero_point, int8_t *output_data, float output_scale,
+ int output_zero_point)
+{
+ const float cutoff_upper = 16.619047164916992188f;
+ const float cutoff_lower = -9.f;
+
+ // Rational for using approximation in reference kernel.
+ // 0. This approximation gives enough precision for float.
+ // 1. This works around an issue on an embedded chipset where exp() does not
+ // return correctly as expected - exp(x) should return inf when overflown
+ // not 1.701417 IEEE 754 defines representation for inf.
+ // 2. This will speed up calculation and is matching the behavior in the
+ // optimized kernels. (check the definition of scalar_logistic_op<float>)
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // Dequantize.
+ float val = static_cast<float>((input_data[i] - input_zero_point) * input_scale);
+ float result;
+ if (val > cutoff_upper)
+ {
+ result = 1.0f;
+ }
+ else if (val < cutoff_lower)
+ {
+ result = std::exp(val);
+ }
+ else
+ {
+ result = 1.f / (1.f + std::exp(-val));
+ }
+ // Requantize
+ int8_t output = static_cast<int8_t>(result / output_scale + output_zero_point);
+ output_data[i] = output;
+ }
+}
+
+inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, int32_t input_size,
+ const int16_t *ptr_input_data, int16_t *ptr_output_data)
+{
+ // We use the LUT for sigmoid and take into account, that
+ // tanh(x) = 2*sigmoid(2*x) - 1
+
+ // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
+ // In case of general parameter scale, multiplier 3 is taken into account
+ // in TanhPrepare function and it is included in
+ // input_multiplier already.
+ if (input_multiplier == 0)
+ { // power of two case
+ input_multiplier = 3 << input_left_shift;
+ input_left_shift = 0;
+ }
+
+ int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
+
+ for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++)
+ {
+ int32_t input_data = ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
+
+ // We do interpolation on unsigned values.
+ uint32_t abs_input_data = abs(input_data);
+
+ // We divide by 2 power of 9, because
+ // we need to divide by 2 in power of 7 for
+ // the input conversion + 1/4 from the scale above.
+
+ // Define uh as uint32_t type not to make this function overflow.
+ uint32_t uh = abs_input_data >> 9;
+ uint32_t result;
+
+ if (uh >= 255)
+ {
+ // Saturate to maximum.
+ result = 0x7FFF << 10;
+ }
+ else
+ {
+ uint32_t ua = sigmoid_table_uint16[uh];
+ uint32_t ub = sigmoid_table_uint16[uh + 1];
+ uint32_t ut = abs_input_data & 0x1ff;
+ // Interpolation is done using the fractional bit.
+ result = (ua << 9) + ut * (ub - ua);
+ }
+
+ result = (input_data >= 0) ? (result + (1 << 9)) : ((1 << (16 + 9)) - result + (1 << 9) - 1);
+
+ // Back to 16-bit.
+ result >>= 10;
+
+ *ptr_output_data = result;
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGISTIC_H