summaryrefslogtreecommitdiff
path: root/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp')
-rw-r--r--libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp294
1 files changed, 0 insertions, 294 deletions
diff --git a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp
deleted file mode 100644
index 3b5782c25..000000000
--- a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEMath.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared,
- const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
-
- // Checks performed when output is configured
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input,
- ITensorInfo *input_squared,
- ITensorInfo *output,
- const NormalizationLayerInfo &norm_info)
-{
- unsigned int num_elems_processed_per_iteration = 16 / input->element_size();
- const unsigned int num_elems_read_per_iteration =
- num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
- const unsigned int num_rows =
- (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1;
- const unsigned int border_width =
- (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
- BorderSize border_size = BorderSize(0, border_width);
- bool window_changed = false;
-
- // Configure window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
- AccessWindowRectangle input_access(input, -border_size.left, 0, num_elems_read_per_iteration,
- num_rows);
- AccessWindowRectangle input_squared_access(input_squared, -border_size.left, 0,
- num_elems_read_per_iteration, num_rows);
-
- if (output->total_size() != 0)
- {
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed =
- update_window_and_padding(win, input_access, input_squared_access, output_access);
- output_access.set_valid_region(win, input->valid_region());
- }
- else
- {
- window_changed = update_window_and_padding(win, input_access, input_squared_access);
- }
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-NENormalizationLayerExKernel::NENormalizationLayerExKernel()
- : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr),
- _norm_info(NormType::IN_MAP_1D), _border_size()
-{
-}
-
-BorderSize NENormalizationLayerExKernel::border_size() const { return _border_size; }
-
-void NENormalizationLayerExKernel::configure(const ITensor *input, const ITensor *input_squared,
- ITensor *output, NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output);
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info());
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), input_squared->info(), output->info(), norm_info));
-
- const unsigned int border_width =
- (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
-
- _input = input;
- _input_squared = input_squared;
- _output = output;
- _norm_info = norm_info;
- _border_size = BorderSize(0, border_width);
-
- switch (_input->info()->data_type())
- {
- case DataType::F32:
- {
- switch (norm_info.type())
- {
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, true>;
- break;
- case NormType::CROSS_MAP:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 2, false>;
- break;
- default:
- break;
- }
- break;
- }
- case DataType::F16:
- {
- switch (norm_info.type())
- {
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, true>;
- break;
- case NormType::CROSS_MAP:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 2, false>;
- break;
- default:
- break;
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("NOT SUPPORTED!");
- }
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), input_squared->info(),
- output->info(), norm_info);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- INEKernel::configure(win_config.second);
-}
-
-template <DataType dt, unsigned int dim, bool do_2D_norm>
-void NENormalizationLayerExKernel::normalize_float(const Window &window)
-{
- Iterator input(_input, window);
- Iterator input_squared(_input_squared, window);
- Iterator output(_output, window);
-
- const int dim_y = 1;
- const int radius = _norm_info.norm_size();
- const int total_size = _input->info()->dimension(dim) - 1;
- const int input_squared_stride = _input_squared->info()->strides_in_bytes()[dim];
- // We account padding across X only and we iterate over rows
- const int min_left = (dim == 2) ? 0 : -static_cast<int>(border_size().left);
- const int max_right = (dim == 2) ? total_size : total_size + border_size().left;
- const int min_top = 0;
- const int max_bottom = _input->info()->dimension(dim_y) - 1;
-
- if (dt == DataType::F32)
- {
- const float32x4_t coeff_vec = vdupq_n_f32(_norm_info.scale_coeff());
- const float32x4_t beta_vec = vdupq_n_f32(_norm_info.beta());
- const float32x4_t kappa_vec = vdupq_n_f32(_norm_info.kappa());
-
- execute_window_loop(
- window,
- [&](const Coordinates &id) {
- // Get range to normalize
- const int current_row = do_2D_norm ? id[dim_y] : 0;
- const int current_slice = id[dim];
- const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0;
- const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
- const int first_slice = std::max(current_slice - radius, min_left);
- const int last_slice = std::min(current_slice + radius, max_right);
-
- // Accumulate 2D In-Map values
- float32x4_t accu = vdupq_n_f32(0.f);
- for (int j = first_row; j <= last_row; j++)
- {
- // Compute row displacement
- const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y];
- const uint8_t *const input_squared_ptr =
- input_squared.ptr() + row - (current_slice * input_squared_stride);
- for (int i = first_slice; i <= last_slice; ++i)
- {
- accu = vaddq_f32(accu, vld1q_f32(reinterpret_cast<const float *>(
- input_squared_ptr + i * input_squared_stride)));
- }
- }
-
- // Normalize
- const float32x4_t normalized = vpowq_f32(vmlaq_f32(kappa_vec, coeff_vec, accu), beta_vec);
- const float32x4_t normalized_pixel = vmulq_f32(
- vld1q_f32(reinterpret_cast<const float *>(input.ptr())), vinvq_f32(normalized));
- vst1q_f32(reinterpret_cast<float *>(output.ptr()), normalized_pixel);
- },
- input, input_squared, output);
- }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- else if (dt == DataType::F16)
- {
- const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff());
- const float16x8_t beta_vec_f16 = vdupq_n_f16(_norm_info.beta());
- const float16x8_t kappa_vec = vdupq_n_f16(_norm_info.kappa());
-
- execute_window_loop(
- window,
- [&](const Coordinates &id) {
- // Get range to normalize
- const int current_row = do_2D_norm ? id[dim_y] : 0;
- const int current_slice = id[dim];
- const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0;
- const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
- const int first_slice = std::max(current_slice - radius, min_left);
- const int last_slice = std::min(current_slice + radius, max_right);
-
- // Accumulate 2D In-Map values
- float16x8_t accu = vdupq_n_f16(0.f);
- for (int j = first_row; j <= last_row; j++)
- {
- // Compute row displacement
- const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y];
- const uint8_t *const input_squared_ptr =
- input_squared.ptr() + row - (current_slice * input_squared_stride);
- for (int i = first_slice; i <= last_slice; ++i)
- {
- accu = vaddq_f16(accu, vld1q_f16(reinterpret_cast<const float16_t *>(
- input_squared_ptr + i * input_squared_stride)));
- }
- }
-
- const float16x8_t norm_f16 =
- vpowq_f16(vaddq_f16(kappa_vec, vmulq_f16(coeff_vec, accu)), beta_vec_f16);
- const float16x8_t normalized_pixel = vmulq_f16(
- vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr())), vinvq_f16(norm_f16));
- vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), normalized_pixel);
- },
- input, input_squared, output);
- }
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- else
- {
- ARM_COMPUTE_ERROR("Not supported");
- }
-}
-
-Status NENormalizationLayerExKernel::validate(const ITensorInfo *input,
- const ITensorInfo *input_squared,
- const ITensorInfo *output,
- const NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
- input_squared->clone().get(),
- output->clone().get(), norm_info)
- .first);
-
- return Status{};
-}
-
-void NENormalizationLayerExKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
- // Run function
- (this->*_func)(window);
-}