1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
#define __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
#if defined(CKER_OPTIMIZED_EIGEN)
#include "cker/eigen/Utils.h"
#include "cker/Shape.h"
#include "cker/Types.h"
#include "cker/Utils.h"
#include <Eigen/Core>
namespace nnfw
{
namespace cker
{
namespace optimized
{
// TODO Change to apply neon for this function if it is faster
inline void AveragePool(const PoolParams ¶ms, const Shape &input_shape, const float *input_data,
const Shape &output_shape, float *output_data)
{
assert(input_shape.DimensionsCount() == 4);
assert(output_shape.DimensionsCount() == 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
// TODO(benoitjacob) make this a proper reference impl without Eigen!
const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
// TODO(benoitjacob) get rid of the dynamic memory allocation here!
Eigen::VectorXf out_count(out_mat.cols());
out_count.setZero();
// Prefill the output to 0.
out_mat.setZero();
for (int b = 0; b < batches; ++b)
{
for (int h = 0; h < input_height; ++h)
{
for (int w = 0; w < input_width; ++w)
{
// (h_start, h_end) * (w_start, w_end) is the range that the input
// vector projects to.
int hpad = h + params.padding_values.height;
int wpad = w + params.padding_values.width;
int h_start =
(hpad < params.filter_height) ? 0 : (hpad - params.filter_height) / stride_height + 1;
int h_end = std::min(hpad / stride_height + 1, output_height);
int w_start =
(wpad < params.filter_width) ? 0 : (wpad - params.filter_width) / stride_width + 1;
int w_end = std::min(wpad / stride_width + 1, output_width);
// compute elementwise sum
for (int ph = h_start; ph < h_end; ++ph)
{
for (int pw = w_start; pw < w_end; ++pw)
{
int out_offset = NodeOffset(b, ph, pw, output_height, output_width);
out_mat.col(out_offset) += in_mat.col(NodeOffset(b, h, w, input_height, input_width));
out_count(out_offset)++;
}
}
}
}
}
// Divide the output by the actual number of elements being averaged over
assert(out_count.minCoeff() > 0);
out_mat.array().rowwise() /= out_count.transpose().array();
const int flat_size = output_shape.FlatSize();
for (int i = 0; i < flat_size; ++i)
{
output_data[i] = ActivationFunctionWithMinMax(output_data[i], params.float_activation_min,
params.float_activation_max);
}
}
} // namespace optimized
} // namespace cker
} // namespace nnfw
#endif // defined(CKER_OPTIMIZED_EIGEN)
#endif // __NNFW_CKER_OPTIMIZED_AVERAGE_POOL_H__
|