diff options
Diffstat (limited to 'runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc')
-rw-r--r-- | runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc new file mode 100644 index 000000000..d62a8321b --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleDepthToSpace.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, + int32_t block_size, const ::arm_compute::Coordinates &axises) +{ + const auto rank = axises.num_dimensions(); + assert(rank == 4); + for (int i = 0; i < rank; ++i) + { + assert(axises[i] >= 0); + assert(axises[i] < rank); + } + + _input = input; + _output = output; + _block_size = block_size; + _axises = axises; +} + +template <typename T> +inline void DepthToSpace(const ::arm_compute::ITensor *input, + const ::arm_compute::TensorShape &input_shape, int32_t block_size, + ::arm_compute::ITensor *output, + const ::arm_compute::TensorShape &output_shape, + const ::arm_compute::Coordinates &axises) +{ + const int output_batch = output_shape[axises[0]]; + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int output_depth = output_shape[axises[3]]; + + for (int out_b = 0; out_b < output_batch; ++out_b) + { + for (int out_h = 0; out_h < output_height; ++out_h) + { + for (int out_w = 0; out_w < output_width; ++out_w) + { + for (int out_d = 0; out_d < output_depth; ++out_d) + { + const int in_b = out_b; + const int in_h = out_h / block_size; + const int in_w = out_w / block_size; + const int in_d = + out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth; + + auto input_id = + asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises); + + *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = + *reinterpret_cast<T *>(input->ptr_to_element(input_id)); + } + } + } + } +} + +void SimpleDepthToSpace::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_output)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::U8: + case ::arm_compute::DataType::QASYMM8: + DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + case ::arm_compute::DataType::F32: + DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output, + _output->info()->tensor_shape(), _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} |