diff options
Diffstat (limited to 'libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp')
-rw-r--r-- | libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp | 253 |
1 files changed, 0 insertions, 253 deletions
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp deleted file mode 100644 index 48146a43a..000000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" - -using namespace arm_compute; - -CLStridedSliceExKernel::CLStridedSliceExKernel() - : _input(nullptr), _output(nullptr), _beginData(nullptr), _endData(nullptr), - _stridesData(nullptr), _beginMask(0), _endMask(0), _shrinkAxisMask(0) -{ -} - -Status CLStridedSliceExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *begin, const ITensorInfo *end, - const ITensorInfo *strides, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, begin, end, strides); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(begin, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(end, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(strides, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON(begin->num_dimensions() != 1 || begin->dimension(0) > 4); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(begin->tensor_shape(), end->tensor_shape(), - strides->tensor_shape()); - - return Status{}; -} - -// Return the index for the first element along that axis. This index will be a -// positive integer between [0, axisSize - 1] that can be used to index -// directly into the data. -inline int32_t StartForAxis(int32_t beginMask, int32_t begin, int32_t stride, - const TensorShape &inputShape, int32_t axis) -{ - // Begin with the specified index - int32_t start = begin; - - // beginMask override - if (beginMask & 1 << axis) - { - if (stride > 0) - { - // Forward iteration - use the first element. These values will get - // clamped below (Note: We could have set them to 0 and axisSize-1, but - // use lowest() and max() to maintain symmetry with StopForAxis()) - start = std::numeric_limits<int32_t>::lowest(); - } - else - { - // Backward iteration - use the last element. - start = std::numeric_limits<int32_t>::max(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (start < 0) - { - start += axisSize; - } - - // Clamping - start = arm_compute::utility::clamp(start, 0, axisSize - 1); - - return start; -} - -// Return the "real" index for the end of iteration along that axis. This is an -// "end" in the traditional C sense, in that it points to one past the last -// element. ie. So if you were iterating through all elements of a 1D array of -// size 4, this function would return 4 as the stop, because it is one past the -// "real" indices of 0, 1, 2 & 3. -inline int32_t StopForAxis(int32_t endMask, int32_t end, int32_t stride, - const TensorShape &inputShape, int32_t axis) -{ - // Begin with the specified index - int32_t stop = end; - - // endMask override - if (endMask & (1 << axis)) - { - if (stride > 0) - { - // Forward iteration - use the last element. These values will get - // clamped below - stop = std::numeric_limits<int32_t>::max(); - } - else - { - // Backward iteration - use the first element. - stop = std::numeric_limits<int32_t>::lowest(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (stop < 0) - { - stop += axisSize; - } - - // Clamping - // Because the end index points one past the last element, we need slightly - // different clamping ranges depending on the direction. - if (stride > 0) - { - // Forward iteration - stop = arm_compute::utility::clamp(stop, 0, axisSize); - } - else - { - // Backward iteration - stop = arm_compute::utility::clamp(stop, -1, axisSize - 1); - } - - return stop; -} - -inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride) -{ - int32_t ret = 0; - if (stride > 0) - { - ret = ((stop - start - 1) / stride) + 1; - } - else - { - ret = ((stop - start + 1) / stride) + 1; - } - ARM_COMPUTE_ERROR_ON_MSG(ret < 0, "The dimension must be the natural number"); - return ret; -} - -void CLStridedSliceExKernel::configure(const ICLTensor *input, ICLTensor *output, - ICLTensor *beginData, ICLTensor *endData, - ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), beginData->info(), - endData->info(), stridesData->info(), beginMask, endMask, - shrinkAxisMask)); - - _input = input; - _output = output; - _beginData = beginData; - _endData = endData; - _stridesData = stridesData; - _beginMask = beginMask; - _endMask = endMask; - _shrinkAxisMask = shrinkAxisMask; - - // Set kernel build options - std::set<std::string> build_opts; - build_opts.emplace("-DELEMENT_DATA_TYPE=" + - get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - - // Create kernel - _kernel = static_cast<cl::Kernel>( - CLKernelLibraryEx::get().create_kernel("strided_slice_ex", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - ICLKernel::configure_internal(win); -} - -void CLStridedSliceExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _beginData->map(queue); - _endData->map(queue); - _stridesData->map(queue); - - std::vector<int32_t> starts; - std::vector<int32_t> strides; - - for (uint32_t n = 0; n < _beginData->info()->tensor_shape().total_size(); ++n) - { - const TensorShape shape = _input->info()->tensor_shape(); - starts.emplace_back( - StartForAxis(_beginMask, reinterpret_cast<int32_t *>(_beginData->buffer())[n], - reinterpret_cast<int32_t *>(_stridesData->buffer())[n], shape, n)); - - strides.emplace_back(reinterpret_cast<int32_t *>(_stridesData->buffer())[n]); - } - - for (uint32_t n = _beginData->info()->tensor_shape().total_size(); n < 4; n++) - { - starts.emplace_back(0); - strides.emplace_back(1); - } - // TODO: Apply shrinkAxisMask - - _beginData->unmap(queue); - _stridesData->unmap(queue); - _endData->unmap(queue); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - const cl_int4 startsArg = {{ - static_cast<cl_int>(starts[0]), static_cast<cl_int>(starts[1]), - static_cast<cl_int>(starts[2]), static_cast<cl_int>(starts[3]), - }}; - _kernel.setArg<cl_int4>(idx++, startsArg); - - const cl_int4 stridesArg = {{ - static_cast<cl_int>(strides[0]), static_cast<cl_int>(strides[1]), - static_cast<cl_int>(strides[2]), static_cast<cl_int>(strides[3]), - }}; - _kernel.setArg<cl_int4>(idx++, stridesArg); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} |