summaryrefslogtreecommitdiff
path: root/compute/cker/include/cker/operation/optimized/AveragePool.h
diff options
context:
space:
mode:
Diffstat (limited to 'compute/cker/include/cker/operation/optimized/AveragePool.h')
-rw-r--r--compute/cker/include/cker/operation/optimized/AveragePool.h105
1 files changed, 105 insertions, 0 deletions
diff --git a/compute/cker/include/cker/operation/optimized/AveragePool.h b/compute/cker/include/cker/operation/optimized/AveragePool.h
new file mode 100644
index 000000000..d94a5811a
--- /dev/null
+++ b/compute/cker/include/cker/operation/optimized/AveragePool.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
+#define __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
+
+#if defined(CKER_OPTIMIZED_EIGEN)
+
+#include "cker/eigen/Utils.h"
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace optimized
+{
+
+// TODO Change to apply neon for this function if it is faster
+inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+ const Shape &output_shape, float *output_data)
+{
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ const int stride_height = params.stride_height;
+ const int stride_width = params.stride_width;
+
+ // TODO(benoitjacob) make this a proper reference impl without Eigen!
+ const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
+ auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
+ // TODO(benoitjacob) get rid of the dynamic memory allocation here!
+ Eigen::VectorXf out_count(out_mat.cols());
+ out_count.setZero();
+ // Prefill the output to 0.
+ out_mat.setZero();
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int h = 0; h < input_height; ++h)
+ {
+ for (int w = 0; w < input_width; ++w)
+ {
+ // (h_start, h_end) * (w_start, w_end) is the range that the input
+ // vector projects to.
+ int hpad = h + params.padding_values.height;
+ int wpad = w + params.padding_values.width;
+ int h_start =
+ (hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
+ int h_end = std::min(hpad / stride_height + 1, output_height);
+ int w_start =
+ (wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
+ int w_end = std::min(wpad / stride_width + 1, output_width);
+ // compute elementwise sum
+ for (int ph = h_start; ph < h_end; ++ph)
+ {
+ for (int pw = w_start; pw < w_end; ++pw)
+ {
+ int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
+ out_mat.col(out_offset) += in_mat.col(NodeOffset(b, h, w, input_height, input_width));
+ out_count(out_offset)++;
+ }
+ }
+ }
+ }
+ }
+ // Divide the output by the actual number of elements being averaged over
+ assert(out_count.minCoeff() > 0);
+ out_mat.array().rowwise() /= out_count.transpose().array();
+
+ const int flat_size = output_shape.FlatSize();
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
+ params.float_activation_max);
+ }
+}
+
+} // namespace optimized
+} // namespace cker
+} // namespace nnfw
+
+#endif // defined(CKER_OPTIMIZED_EIGEN)
+
+#endif // __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__