summaryrefslogtreecommitdiff
path: root/contrib/labs/kerneltesting/conv2d/optimized_ops.h
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/labs/kerneltesting/conv2d/optimized_ops.h')
-rw-r--r--contrib/labs/kerneltesting/conv2d/optimized_ops.h339
1 files changed, 0 insertions, 339 deletions
diff --git a/contrib/labs/kerneltesting/conv2d/optimized_ops.h b/contrib/labs/kerneltesting/conv2d/optimized_ops.h
deleted file mode 100644
index 1d8c4ff28..000000000
--- a/contrib/labs/kerneltesting/conv2d/optimized_ops.h
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_
-#define ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_
-
-// Make a local VectorMap typedef allowing to map a float array
-// as a Eigen matrix expression. The same explanation as for VectorMap
-// above also applies here.
-template <typename Scalar>
-using MatrixMap = typename std::conditional<
- std::is_const<Scalar>::value,
- Eigen::Map<const Eigen::Matrix<typename std::remove_const<Scalar>::type,
- Eigen::Dynamic, Eigen::Dynamic>>,
- Eigen::Map<Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>>>::type;
-
-template <typename Scalar, int N>
-MatrixMap<Scalar> MapAsMatrixWithFirstDimAsRows(Scalar* data,
- const Dims<N>& dims) {
- const int rows = dims.sizes[0];
- int cols = 1;
- for (int d = 1; d < N; d++) {
- cols *= dims.sizes[d];
- }
- return MatrixMap<Scalar>(data, rows, cols);
-}
-
-template <typename Scalar, int N>
-MatrixMap<Scalar> MapAsMatrixWithLastDimAsCols(Scalar* data,
- const Dims<N>& dims) {
- const int cols = dims.sizes[N - 1];
- int rows = 1;
- for (int d = 0; d < N - 1; d++) {
- rows *= dims.sizes[d];
- }
- return MatrixMap<Scalar>(data, rows, cols);
-}
-
-template <typename T>
-inline void ExtractPatchIntoBufferColumn(
- const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
- int stride_width, int stride_height, int pad_width, int pad_height,
- int in_width, int in_height, int in_depth, int single_buffer_length,
- int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) {
- gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn");
- // This chunk of code reshapes all the inputs corresponding to
- // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
- const int kwidth_times_indepth = kwidth * in_depth;
- const int inwidth_times_indepth = in_width * in_depth;
- const int ih_ungated_start = h * stride_height - pad_height;
- const int ih_ungated_end = (ih_ungated_start + kheight);
- const int ih_end = std::min(ih_ungated_end, in_height);
- const int iw_ungated_start = w * stride_width - pad_width;
- const int iw_ungated_end = (iw_ungated_start + kwidth);
- const int iw_end = std::min(iw_ungated_end, in_width);
- // If the patch is off the edge of the input image, skip writing those rows
- // and columns from the patch into the output array.
- const int h_offset = std::max(0, -ih_ungated_start);
- const int w_offset = std::max(0, -iw_ungated_start);
- const int ih_start = std::max(0, ih_ungated_start);
- const int iw_start = std::max(0, iw_ungated_start);
- const int single_row_num =
- std::min(kwidth - w_offset, in_width - iw_start) * in_depth;
- const int output_row_offset = (buffer_id * single_buffer_length);
- int out_offset =
- output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
- int in_offset = Offset(input_dims, 0, iw_start, ih_start, b);
-
- // Express all of the calculations as padding around the input patch.
- const int top_padding = h_offset;
- const int bottom_padding = (ih_ungated_end - ih_end);
- const int left_padding = w_offset;
- const int right_padding = (iw_ungated_end - iw_end);
- assert(single_row_num ==
- ((kwidth - (left_padding + right_padding)) * in_depth));
-
- // Write out zeroes to the elements representing the top rows of the input
- // patch that are off the edge of the input image.
- if (top_padding > 0) {
- const int top_row_elements = (top_padding * kwidth * in_depth);
- memset(conv_buffer_data + output_row_offset, byte_zero,
- (top_row_elements * sizeof(T)));
- }
-
- // If the patch is on the interior of the input image horizontally, just copy
- // over the rows sequentially, otherwise add zero padding at the start or end.
- if ((left_padding == 0) && (right_padding == 0)) {
- for (int ih = ih_start; ih < ih_end; ++ih) {
- memcpy(conv_buffer_data + out_offset, in_data + in_offset,
- single_row_num * sizeof(T));
- out_offset += kwidth_times_indepth;
- in_offset += inwidth_times_indepth;
- }
- } else {
- for (int ih = ih_start; ih < ih_end; ++ih) {
- if (left_padding > 0) {
- const int left_start = (out_offset - (left_padding * in_depth));
- memset(conv_buffer_data + left_start, byte_zero,
- (left_padding * in_depth * sizeof(T)));
- }
- memcpy(conv_buffer_data + out_offset, in_data + in_offset,
- single_row_num * sizeof(T));
- if (right_padding > 0) {
- const int right_start = (out_offset + single_row_num);
- memset(conv_buffer_data + right_start, byte_zero,
- (right_padding * in_depth * sizeof(T)));
- }
- out_offset += kwidth_times_indepth;
- in_offset += inwidth_times_indepth;
- }
- }
-
- // If the bottom of the patch falls off the input image, pad the values
- // representing those input rows with zeroes.
- if (bottom_padding > 0) {
- const int bottom_row_elements = (bottom_padding * kwidth * in_depth);
- const int bottom_start =
- output_row_offset +
- ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
- memset(conv_buffer_data + bottom_start, byte_zero,
- (bottom_row_elements * sizeof(T)));
- }
-}
-
-#ifdef USE_NEON
-template <FusedActivationFunctionType Ac>
-void AddBiasAndEvalActivationFunction(const float* bias_data,
- const Dims<4>& bias_dims,
- float* array_data,
- const Dims<4>& array_dims) {
- gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
- const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3];
- const int array_size = array_dims.sizes[3] * array_dims.strides[3];
- DCHECK_EQ((array_size % bias_size), 0);
- float* array_ptr = array_data;
- float* array_end_ptr = array_ptr + array_size;
- const auto zero = vdupq_n_f32(0);
- const auto six = vdupq_n_f32(6);
- const auto neg_one = vdupq_n_f32(-1);
- const auto one = vdupq_n_f32(1);
- for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
- int i = 0;
- for (; i <= bias_size - 16; i += 16) {
- auto b0 = vld1q_f32(bias_data + i);
- auto b1 = vld1q_f32(bias_data + i + 4);
- auto b2 = vld1q_f32(bias_data + i + 8);
- auto b3 = vld1q_f32(bias_data + i + 12);
- auto a0 = vld1q_f32(array_ptr + i);
- auto a1 = vld1q_f32(array_ptr + i + 4);
- auto a2 = vld1q_f32(array_ptr + i + 8);
- auto a3 = vld1q_f32(array_ptr + i + 12);
- auto x0 = vaddq_f32(a0, b0);
- auto x1 = vaddq_f32(a1, b1);
- auto x2 = vaddq_f32(a2, b2);
- auto x3 = vaddq_f32(a3, b3);
- if (Ac == FusedActivationFunctionType::kRelu ||
- Ac == FusedActivationFunctionType::kRelu6) {
- x0 = vmaxq_f32(zero, x0);
- x1 = vmaxq_f32(zero, x1);
- x2 = vmaxq_f32(zero, x2);
- x3 = vmaxq_f32(zero, x3);
- if (Ac == FusedActivationFunctionType::kRelu6) {
- x0 = vminq_f32(six, x0);
- x1 = vminq_f32(six, x1);
- x2 = vminq_f32(six, x2);
- x3 = vminq_f32(six, x3);
- }
- } else if (Ac == FusedActivationFunctionType::kRelu1) {
- x0 = vmaxq_f32(neg_one, x0);
- x1 = vmaxq_f32(neg_one, x1);
- x2 = vmaxq_f32(neg_one, x2);
- x3 = vmaxq_f32(neg_one, x3);
- x0 = vminq_f32(one, x0);
- x1 = vminq_f32(one, x1);
- x2 = vminq_f32(one, x2);
- x3 = vminq_f32(one, x3);
- }
- vst1q_f32(array_ptr + i, x0);
- vst1q_f32(array_ptr + i + 4, x1);
- vst1q_f32(array_ptr + i + 8, x2);
- vst1q_f32(array_ptr + i + 12, x3);
- }
- for (; i <= bias_size - 4; i += 4) {
- auto b = vld1q_f32(bias_data + i);
- auto a = vld1q_f32(array_ptr + i);
- auto x = vaddq_f32(a, b);
- if (Ac == FusedActivationFunctionType::kRelu ||
- Ac == FusedActivationFunctionType::kRelu6) {
- x = vmaxq_f32(zero, x);
- if (Ac == FusedActivationFunctionType::kRelu6) {
- x = vminq_f32(six, x);
- }
- } else if (Ac == FusedActivationFunctionType::kRelu1) {
- x = vmaxq_f32(neg_one, x);
- x = vminq_f32(one, x);
- }
- vst1q_f32(array_ptr + i, x);
- }
- for (; i < bias_size; i++) {
- array_ptr[i] = ActivationFunction<Ac>(array_ptr[i] + bias_data[i]);
- }
- }
-}
-#else // not NEON
-template <FusedActivationFunctionType Ac>
-void AddBiasAndEvalActivationFunction(const float* bias_data,
- const Dims<4>& bias_dims,
- float* array_data,
- const Dims<4>& array_dims) {
- gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
- const int bias_size = bias_dims.sizes[3] * bias_dims.strides[3];
- const int array_size = array_dims.sizes[3] * array_dims.strides[3];
- DCHECK_EQ((array_size % bias_size), 0);
- for (int array_offset = 0; array_offset < array_size;
- array_offset += bias_size) {
- for (int i = 0; i < bias_size; i++) {
- array_data[array_offset + i] =
- ActivationFunction<Ac>(array_data[array_offset + i] + bias_data[i]);
- }
- }
-}
-#endif
-
-template <typename Lhs, typename Rhs, typename Result>
-void Gemm(const Eigen::MatrixBase<Lhs>& lhs, const Eigen::MatrixBase<Rhs>& rhs,
- Eigen::MatrixBase<Result>* result) {
- if (rhs.cols() == 1) {
- gemmlowp::ScopedProfilingLabel label("GEMV");
- result->col(0).noalias() = lhs * rhs.col(0);
- } else {
- gemmlowp::ScopedProfilingLabel label("GEMM");
- result->noalias() = lhs * rhs;
- }
-}
-
-template <typename T>
-void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
- int stride_height, int pad_width, int pad_height, int kheight,
- int kwidth, uint8 byte_zero, T* output_data,
- const Dims<4>& output_dims) {
- gemmlowp::ScopedProfilingLabel label("Im2col");
- DCHECK(IsPackedWithoutStrides(input_dims));
- DCHECK(IsPackedWithoutStrides(output_dims));
- const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
- const int input_depth = ArraySize(input_dims, 0);
- const int input_width = ArraySize(input_dims, 1);
- const int input_height = ArraySize(input_dims, 2);
- const int output_depth = ArraySize(output_dims, 0);
- const int output_width = ArraySize(output_dims, 1);
- const int output_height = ArraySize(output_dims, 2);
-
- int buffer_id = 0;
- // Loop over the output nodes.
- for (int b = 0; b < batches; ++b) {
- for (int h = 0; h < output_height; ++h) {
- for (int w = 0; w < output_width; ++w) {
- ExtractPatchIntoBufferColumn(
- input_dims, w, h, b, kheight, kwidth, stride_width, stride_height,
- pad_width, pad_height, input_width, input_height, input_depth,
- output_depth, buffer_id, input_data, output_data, byte_zero);
- ++buffer_id;
- }
- }
- }
-}
-
-template <FusedActivationFunctionType Ac>
-void Conv(const float* input_data, const Dims<4>& input_dims,
- const float* filter_data, const Dims<4>& filter_dims,
- const float* bias_data, const Dims<4>& bias_dims, int stride_width,
- int stride_height, int pad_width, int pad_height, float* output_data,
- const Dims<4>& output_dims, float* im2col_data,
- const Dims<4>& im2col_dims) {
- (void)im2col_data;
- (void)im2col_dims;
- gemmlowp::ScopedProfilingLabel label("Conv");
-
- const float* gemm_input_data = nullptr;
- const Dims<4>* gemm_input_dims = nullptr;
- const int filter_width = ArraySize(filter_dims, 1);
- const int filter_height = ArraySize(filter_dims, 2);
- const bool need_im2col = stride_width != 1 || stride_height != 1 ||
- filter_width != 1 || filter_height != 1;
- if (need_im2col) {
- DCHECK(im2col_data);
- Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
- pad_height, filter_height, filter_width, 0, im2col_data,
- im2col_dims);
- gemm_input_data = im2col_data;
- gemm_input_dims = &im2col_dims;
- } else {
- DCHECK(!im2col_data);
- gemm_input_data = input_data;
- gemm_input_dims = &input_dims;
- }
-
- const auto im2col_matrix_map =
- MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims);
- const auto filter_matrix_map =
- MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
- auto output_matrix_map =
- MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
-
- Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
-
- AddBiasAndEvalActivationFunction<Ac>(bias_data, bias_dims, output_data,
- output_dims);
-}
-
-#endif // ANDROID_ML_NN_COMMON_OPERATIONS_INTERNAL_OPTIMIZED_OPS_H_