summaryrefslogtreecommitdiff
path: root/compiler/luci-interpreter/src/kernels/Mean.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/luci-interpreter/src/kernels/Mean.cpp')
-rw-r--r--compiler/luci-interpreter/src/kernels/Mean.cpp175
1 files changed, 136 insertions, 39 deletions
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp
index 2394e2c0e..8e65e0d6d 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
#include <stdexcept>
@@ -28,7 +28,7 @@ namespace luci_interpreter
namespace kernels
{
-static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params)
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
{
params->axis_count = num_axes;
for (int i = 0; i < num_axes; ++i)
@@ -42,7 +42,7 @@ static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *
}
// Returns the number of axes that will be reduced. Removes duplicates.
-static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims)
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
{
int reduction_count = num_axes;
for (int i = 0; i < num_axes; ++i)
@@ -63,7 +63,7 @@ static int getAxisReductionCount(const int *axes_data, int num_axes, int input_n
return reduction_count;
}
-static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes,
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
bool keep_dims)
{
int input_num_dims = input_shape.num_dims();
@@ -123,15 +123,22 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int
}
}
-Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params)
- : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+ : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+ params)
{
}
void Mean::configure()
{
- assert(input()->element_type() == output()->element_type());
- assert(axes()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+ if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+
const Shape &input_shape = input()->shape();
int input_num_dims = input_shape.num_dims();
@@ -144,18 +151,28 @@ void Mean::configure()
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, &params);
- const bool need_temporaries =
- !(_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
- ((params.axis[0] == 1 && params.axis[1] == 2) ||
- (params.axis[0] == 2 && params.axis[1] == 1)));
- if (need_temporaries)
- {
- _temp_index =
- std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
- _resolved_axes =
- std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
- _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
- AffineQuantization{}, "");
+ _need_temporaries = !(
+ _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+ if (_need_temporaries)
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->resize(Shape(input_num_dims));
+ resolved_axes->resize(Shape(num_axes));
+ temp_sum->resize(output()->shape());
+ }
+ else
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->set_allocatable(false);
+ resolved_axes->set_allocatable(false);
+ temp_sum->set_allocatable(false);
}
}
@@ -169,6 +186,9 @@ void Mean::execute() const
case DataType::U8:
evalQuantized();
break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
default:
throw std::runtime_error("Unsupported type.");
}
@@ -184,6 +204,10 @@ void Mean::evalFloat() const
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, &params);
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -194,12 +218,12 @@ void Mean::evalFloat() const
}
else
{
- tflite::reference_ops::Mean(
- getTensorData<float>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<float>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+ tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<float>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<float>(temp_sum));
}
}
@@ -213,6 +237,10 @@ void Mean::evalQuantized() const
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, &params);
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -225,23 +253,92 @@ void Mean::evalQuantized() const
}
else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
{
- tflite::reference_ops::Mean(
- getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<uint8_t>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+ tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum));
}
else
{
tflite::reference_ops::QuantizedMeanOrSum<>(
- getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
- getTensorShape(input()).DimsData(), input()->shape().num_dims(),
- getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
- /*compute_sum=*/false);
+ getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+ getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+ getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+ _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum),
+ /*compute_sum=*/false);
+ }
+}
+
+void Mean::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ const int num_axes = axes()->shape().num_elements();
+
+ constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+ constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+ ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+ {
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t depth = input_shape.dim(3);
+ assert(output_shape.num_dims() == 4);
+ assert(output_shape.dim(0) == batches);
+ assert(output_shape.dim(1) == 1);
+ assert(output_shape.dim(2) == 1);
+ assert(output_shape.dim(3) == depth);
+
+ const double real_multiplier =
+ static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+ int32_t output_multiplier{};
+ int output_shift{};
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const int32_t num_elements_in_axes = input_height * input_width;
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t c = 0; c < depth; ++c)
+ {
+ int32_t acc = 0;
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+ }
+ }
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ // Divide by the number of elements rounding to the nearest integer.
+ scaled_acc = scaled_acc > 0
+ ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+ : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+ scaled_acc = std::max(scaled_acc, output_min);
+ scaled_acc = std::min(scaled_acc, output_max);
+
+ output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+ }
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported configuration.");
}
}