summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorАндрей Шедько/AI Tools Lab /SRR/Engineer/삼성전자 <a.shedko@samsung.com>2019-04-10 03:54:57 (GMT)
committer박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>2019-04-10 03:54:57 (GMT)
commitda96d8f8219abf6b5b2d59905c1cee1b0a0fbc81 (patch)
tree528cff322daf0ecfec12c0778c79886a1bde1461
parent8d2222f661cb7fcdc7c54aeec8012c61b3335b77 (diff)
downloadnnfw-da96d8f8219abf6b5b2d59905c1cee1b0a0fbc81.zip
nnfw-da96d8f8219abf6b5b2d59905c1cee1b0a0fbc81.tar.gz
nnfw-da96d8f8219abf6b5b2d59905c1cee1b0a0fbc81.tar.bz2
[NNFW] optimized Conv on CPU (#4964)
Moved condition and offset calculation out of the innermost loop in cpu convolution kernel. Achieved 2+x speed up on mobilenet on neurun. Signed-off-by: Andrei Shedko <a.shedko@samsung.com>
-rw-r--r--libs/cker/include/cker/operation/Conv.h40
1 files changed, 20 insertions, 20 deletions
diff --git a/libs/cker/include/cker/operation/Conv.h b/libs/cker/include/cker/operation/Conv.h
index e494f0e..d232dbc 100644
--- a/libs/cker/include/cker/operation/Conv.h
+++ b/libs/cker/include/cker/operation/Conv.h
@@ -95,18 +95,18 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const float
{
for (int filter_x = 0; filter_x < filter_width; ++filter_x)
{
- for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
{
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // If the location is outside the bounds of the input image,
- // use zero as a default value.
- if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ const int in_offset = Offset(input_shape, batch, in_y, in_x, 0);
+ const int filter_offset = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
{
- float input_value =
- input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
- float filter_value = filter_data[Offset(filter_shape, out_channel, filter_y,
- filter_x, in_channel)];
+ float input_value = input_data[in_offset + in_channel];
+ float filter_value = filter_data[filter_offset + in_channel];
total += (input_value * filter_value);
}
}
@@ -176,18 +176,18 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
{
for (int filter_x = 0; filter_x < filter_width; ++filter_x)
{
- for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ const int in_base = Offset(input_shape, batch, in_y, in_x, 0);
+ const int filter_base = Offset(filter_shape, out_channel, filter_y, filter_x, 0);
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
{
- const int in_x = in_x_origin + dilation_width_factor * filter_x;
- const int in_y = in_y_origin + dilation_height_factor * filter_y;
- // If the location is outside the bounds of the input image,
- // use zero as a default value.
- if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ for (int in_channel = 0; in_channel < input_depth; in_channel++)
{
- int32_t input_val =
- input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
- int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y,
- filter_x, in_channel)];
+ int32_t input_val = input_data[in_channel + in_base];
+ int32_t filter_val = filter_data[in_channel + filter_base];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
}