diff options
Diffstat (limited to 'libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp')
-rw-r--r-- | libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp | 294 |
1 files changed, 0 insertions, 294 deletions
diff --git a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp deleted file mode 100644 index 3b5782c25..000000000 --- a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h" - -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/NEMath.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared, - const ITensorInfo *output, const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared); - - // Checks performed when output is configured - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, - ITensorInfo *input_squared, - ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); - const unsigned int num_elems_read_per_iteration = - num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); - const unsigned int num_rows = - (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1; - const unsigned int border_width = - (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U); - BorderSize border_size = BorderSize(0, border_width); - bool window_changed = false; - - // Configure window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - AccessWindowRectangle input_access(input, -border_size.left, 0, num_elems_read_per_iteration, - num_rows); - AccessWindowRectangle input_squared_access(input_squared, -border_size.left, 0, - num_elems_read_per_iteration, num_rows); - - if (output->total_size() != 0) - { - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - window_changed = - update_window_and_padding(win, input_access, input_squared_access, output_access); - output_access.set_valid_region(win, input->valid_region()); - } - else - { - window_changed = update_window_and_padding(win, input_access, input_squared_access); - } - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -NENormalizationLayerExKernel::NENormalizationLayerExKernel() - : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr), - _norm_info(NormType::IN_MAP_1D), _border_size() -{ -} - -BorderSize NENormalizationLayerExKernel::border_size() const { return _border_size; } - -void NENormalizationLayerExKernel::configure(const ITensor *input, const ITensor *input_squared, - ITensor *output, NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output); - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), *input->info()); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON( - validate_arguments(input->info(), input_squared->info(), output->info(), norm_info)); - - const unsigned int border_width = - (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U); - - _input = input; - _input_squared = input_squared; - _output = output; - _norm_info = norm_info; - _border_size = BorderSize(0, border_width); - - switch (_input->info()->data_type()) - { - case DataType::F32: - { - switch (norm_info.type()) - { - case NormType::IN_MAP_1D: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, false>; - break; - case NormType::IN_MAP_2D: - // Normalize over X and Y - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, true>; - break; - case NormType::CROSS_MAP: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 2, false>; - break; - default: - break; - } - break; - } - case DataType::F16: - { - switch (norm_info.type()) - { - case NormType::IN_MAP_1D: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, false>; - break; - case NormType::IN_MAP_2D: - // Normalize over X and Y - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, true>; - break; - case NormType::CROSS_MAP: - _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 2, false>; - break; - default: - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("NOT SUPPORTED!"); - } - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), input_squared->info(), - output->info(), norm_info); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - INEKernel::configure(win_config.second); -} - -template <DataType dt, unsigned int dim, bool do_2D_norm> -void NENormalizationLayerExKernel::normalize_float(const Window &window) -{ - Iterator input(_input, window); - Iterator input_squared(_input_squared, window); - Iterator output(_output, window); - - const int dim_y = 1; - const int radius = _norm_info.norm_size(); - const int total_size = _input->info()->dimension(dim) - 1; - const int input_squared_stride = _input_squared->info()->strides_in_bytes()[dim]; - // We account padding across X only and we iterate over rows - const int min_left = (dim == 2) ? 0 : -static_cast<int>(border_size().left); - const int max_right = (dim == 2) ? total_size : total_size + border_size().left; - const int min_top = 0; - const int max_bottom = _input->info()->dimension(dim_y) - 1; - - if (dt == DataType::F32) - { - const float32x4_t coeff_vec = vdupq_n_f32(_norm_info.scale_coeff()); - const float32x4_t beta_vec = vdupq_n_f32(_norm_info.beta()); - const float32x4_t kappa_vec = vdupq_n_f32(_norm_info.kappa()); - - execute_window_loop( - window, - [&](const Coordinates &id) { - // Get range to normalize - const int current_row = do_2D_norm ? id[dim_y] : 0; - const int current_slice = id[dim]; - const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0; - const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0; - const int first_slice = std::max(current_slice - radius, min_left); - const int last_slice = std::min(current_slice + radius, max_right); - - // Accumulate 2D In-Map values - float32x4_t accu = vdupq_n_f32(0.f); - for (int j = first_row; j <= last_row; j++) - { - // Compute row displacement - const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y]; - const uint8_t *const input_squared_ptr = - input_squared.ptr() + row - (current_slice * input_squared_stride); - for (int i = first_slice; i <= last_slice; ++i) - { - accu = vaddq_f32(accu, vld1q_f32(reinterpret_cast<const float *>( - input_squared_ptr + i * input_squared_stride))); - } - } - - // Normalize - const float32x4_t normalized = vpowq_f32(vmlaq_f32(kappa_vec, coeff_vec, accu), beta_vec); - const float32x4_t normalized_pixel = vmulq_f32( - vld1q_f32(reinterpret_cast<const float *>(input.ptr())), vinvq_f32(normalized)); - vst1q_f32(reinterpret_cast<float *>(output.ptr()), normalized_pixel); - }, - input, input_squared, output); - } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - else if (dt == DataType::F16) - { - const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff()); - const float16x8_t beta_vec_f16 = vdupq_n_f16(_norm_info.beta()); - const float16x8_t kappa_vec = vdupq_n_f16(_norm_info.kappa()); - - execute_window_loop( - window, - [&](const Coordinates &id) { - // Get range to normalize - const int current_row = do_2D_norm ? id[dim_y] : 0; - const int current_slice = id[dim]; - const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0; - const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0; - const int first_slice = std::max(current_slice - radius, min_left); - const int last_slice = std::min(current_slice + radius, max_right); - - // Accumulate 2D In-Map values - float16x8_t accu = vdupq_n_f16(0.f); - for (int j = first_row; j <= last_row; j++) - { - // Compute row displacement - const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y]; - const uint8_t *const input_squared_ptr = - input_squared.ptr() + row - (current_slice * input_squared_stride); - for (int i = first_slice; i <= last_slice; ++i) - { - accu = vaddq_f16(accu, vld1q_f16(reinterpret_cast<const float16_t *>( - input_squared_ptr + i * input_squared_stride))); - } - } - - const float16x8_t norm_f16 = - vpowq_f16(vaddq_f16(kappa_vec, vmulq_f16(coeff_vec, accu)), beta_vec_f16); - const float16x8_t normalized_pixel = vmulq_f16( - vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr())), vinvq_f16(norm_f16)); - vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), normalized_pixel); - }, - input, input_squared, output); - } -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - else - { - ARM_COMPUTE_ERROR("Not supported"); - } -} - -Status NENormalizationLayerExKernel::validate(const ITensorInfo *input, - const ITensorInfo *input_squared, - const ITensorInfo *output, - const NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), - input_squared->clone().get(), - output->clone().get(), norm_info) - .first); - - return Status{}; -} - -void NENormalizationLayerExKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - // Run function - (this->*_func)(window); -} |