summaryrefslogtreecommitdiff
path: root/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def')
-rw-r--r--compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def185
1 files changed, 185 insertions, 0 deletions
diff --git a/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def b/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def
new file mode 100644
index 000000000..e38277802
--- /dev/null
+++ b/compiler/nnc/backends/soft_backend/code_snippets/cpp_reduce.def
@@ -0,0 +1,185 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+
+// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
+// This method iterates through input data and reduce elements along the
+// dimensions given in axis.
+template <typename In, typename Out>
+inline bool Reduce(const In* input_data, const int* input_dims,
+ const int* output_dims, const int input_num_dims,
+ const int output_num_dims, const int* axis,
+ const int num_axis, int* input_iter,
+ Out reducer(const Out current, const In in),
+ Out* output_data) {
+ // Reset input iterator.
+ for (int idx = 0; idx < input_num_dims; ++idx) {
+ input_iter[idx] = 0;
+ }
+ // Iterate through input_data.
+ do {
+ size_t input_offset =
+ ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
+ size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
+ input_iter, num_axis, axis);
+ output_data[output_offset] =
+ reducer(output_data[output_offset], input_data[input_offset]);
+ } while (NextIndex(input_num_dims, input_dims, input_iter));
+ return true;
+}
+
+inline bool ResolveAxis(const int num_dims, const int* axis,
+ const int64_t num_axis, int* out_axis,
+ int* out_num_axis) {
+ *out_num_axis = 0; // Just in case.
+ // Short-circuit axis resolution for scalars; the axis will go unused.
+ if (num_dims == 0) {
+ return true;
+ }
+ // o(n^2) is fine since out_num_axis should be really small, mostly <= 4
+ for (int64_t idx = 0; idx < num_axis; ++idx) {
+ // Handle negative index.
+ int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
+ TFLITE_DCHECK(current >= 0 && current < num_dims);
+ bool is_dup = false;
+ for (int j = 0; j < *out_num_axis; ++j) {
+ if (out_axis[j] == current) {
+ is_dup = true;
+ break;
+ }
+ }
+ if (!is_dup) {
+ out_axis[*out_num_axis] = current;
+ *out_num_axis += 1;
+ }
+ }
+ return true;
+}
+
+// This method expects that output_data has been initialized.
+template <typename In, typename Out>
+inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
+ const int* output_dims, const int input_num_dims,
+ const int output_num_dims, const int* axis,
+ const int num_axis, int* input_iter,
+ Out* output_data) {
+ auto reducer = [ ](const Out current, const In in) -> Out {
+ const Out actual_in = static_cast<Out>(in);
+ return current + actual_in;
+ };
+ return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
+ output_num_dims, axis, num_axis, input_iter, reducer,
+ output_data);
+}
+
+template <typename T>
+inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
+ const T init_value, T* data) {
+ size_t num_elements = 1;
+ for (int idx = 0; idx < num_dims; ++idx) {
+ size_t current = static_cast<size_t>(dims[idx]);
+ // Overflow prevention.
+ if (num_elements > std::numeric_limits<size_t>::max() / current) {
+ return false;
+ }
+ num_elements *= current;
+ }
+ for (size_t idx = 0; idx < num_elements; ++idx) {
+ data[idx] = init_value;
+ }
+ return true;
+}
+
+// Computes the generic value (i.e., sum/max/min/prod) of elements across
+// dimensions given in axis. It needs to pass in init_value and reducer.
+template <typename T>
+inline bool ReduceGeneric(const T* input_data, const int* input_dims,
+ const int input_num_dims, T* output_data,
+ const int* output_dims, const int output_num_dims,
+ const int* axis, const int64_t num_axis_dimensions,
+ bool keep_dims, int* temp_index, int* resolved_axis,
+ T init_value,
+ T reducer(const T current, const T in)) {
+ // Reset output data.
+ if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
+ output_data)) {
+ return false;
+ }
+
+ // Resolve axis.
+ int num_resolved_axis = 0;
+ if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
+ &num_resolved_axis)) {
+ return false;
+ }
+
+ return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
+ output_num_dims, resolved_axis, num_resolved_axis,
+ temp_index, reducer, output_data);
+}
+
+// Computes the mean of elements across dimensions given in axis.
+// It does so in two stages, first calculates the sum of elements along the axis
+// then divides it by the number of element in axis.
+template <typename T, typename U>
+inline bool Mean(const T* input_data, const int* input_dims,
+ const int input_num_dims, T* output_data,
+ const int* output_dims, const int output_num_dims,
+ const int* axis, const int num_axis_dimensions, bool keep_dims,
+ int* temp_index, int* resolved_axis, U* temp_sum) {
+ // Reset output data.
+ size_t num_outputs = 1;
+ for (int idx = 0; idx < output_num_dims; ++idx) {
+ size_t current = static_cast<size_t>(output_dims[idx]);
+ // Overflow prevention.
+ if (num_outputs > std::numeric_limits<size_t>::max() / current) {
+ return false;
+ }
+ num_outputs *= current;
+ }
+ for (size_t idx = 0; idx < num_outputs; ++idx) {
+ output_data[idx] = T();
+ temp_sum[idx] = U();
+ }
+
+ // Resolve axis.
+ int num_resolved_axis = 0;
+ if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
+ &num_resolved_axis)) {
+ return false;
+ }
+
+ if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
+ output_num_dims, resolved_axis, num_resolved_axis,
+ temp_index, temp_sum)) {
+ return false;
+ }
+
+ // Calculate mean by dividing output_data by num of aggregated element.
+ U num_elements_in_axis = 1;
+ for (int idx = 0; idx < num_resolved_axis; ++idx) {
+ size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
+ // Overflow prevention.
+ if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
+ return false;
+ }
+ num_elements_in_axis *= current;
+ }
+
+ if (num_elements_in_axis > 0) {
+ for (size_t idx = 0; idx < num_outputs; ++idx) {
+ output_data[idx] =
+ static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
+ }
+ }
+ return true;
+}