diff options
Diffstat (limited to 'runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc')
-rw-r--r-- | runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc new file mode 100644 index 000000000..abc291289 --- /dev/null +++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "internal/layers/SimpleTransposeConv.h" +#include <arm_compute/runtime/CL/CLScheduler.h> + +void SimpleTransposeConv::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights, + ::arm_compute::ITensor *output, + ::arm_compute::PadStrideInfo &tconv_info, + ::arm_compute::Coordinates axises) +{ + auto rank = axises.num_dimensions(); + + assert(rank == 4); + + _input = input; + _weights = weights; + _output = output; + _stride_width = tconv_info.stride().first; + _stride_height = tconv_info.stride().second; + _pad_width = tconv_info.pad_left(); + _pad_height = tconv_info.pad_top(); + _axises = axises; +} + +template <typename T> +inline void ApplyTransposeConv( + const ::arm_compute::TensorShape &input_shape, const ::arm_compute::ITensor *input_data, + const ::arm_compute::TensorShape &filter_shape, const ::arm_compute::ITensor *filter_data, + const ::arm_compute::TensorShape &output_shape, const ::arm_compute::ITensor *output_data, + const int32_t stride_width, const int32_t stride_height, const int32_t pad_width, + const int32_t pad_height, const ::arm_compute::Coordinates axises) +{ + const int batches = input_shape[axises[0]]; + const int input_height = input_shape[axises[1]]; + const int input_width = input_shape[axises[2]]; + const int input_depth = input_shape[axises[3]]; + + const int filter_height = filter_shape[axises[1]]; + const int filter_width = filter_shape[axises[2]]; + + const int output_height = output_shape[axises[1]]; + const int output_width = output_shape[axises[2]]; + const int output_depth = output_shape[axises[3]]; + + assert(batches == output_shape[axises[0]]); + assert(input_depth == filter_shape[axises[3]]); + assert(filter_shape[axises[0]] == output_depth); + + // Although transpose convolution simplifies to convolution with transposed + // weights for strides of 1, non-unitary striding complicates matters. To + // keep this reference implementation as clear as possible, we use a + // "scatter" access pattern, where we loop through all the input elements, + // computing their influence on the output, rather than looping through the + // output elements in the typical "gather" access pattern of a conv. We + // therefore must initialize the output array to zero. + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) + { + for (int in_y = 0; in_y < input_height; ++in_y) + { + for (int in_x = 0; in_x < input_width; ++in_x) + { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) + { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) + { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) + { + auto input_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{batch, in_y, in_x, in_channel}, axises); + auto filter_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{in_channel, filter_y, filter_x, out_channel}, + axises); + auto output_id = asARMComputeCoordinates( + ::arm_compute::Coordinates{batch, out_y, out_x, out_channel}, axises); + T input_value = *reinterpret_cast<T *>(input_data->ptr_to_element(input_id)); + T filter_value = *reinterpret_cast<T *>(filter_data->ptr_to_element(filter_id)); + *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) += + input_value * filter_value; + } + } + } + } + } + } + } + } +} + +void SimpleTransposeConv::run() +{ + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->map(q); + CAST_CL(_weights)->map(q); + CAST_CL(_output)->map(q); + } + + switch (_input->info()->data_type()) + { + case ::arm_compute::DataType::S32: + ApplyTransposeConv<int32_t>(_input->info()->tensor_shape(), _input, + _weights->info()->tensor_shape(), _weights, + _output->info()->tensor_shape(), _output, _stride_width, + _stride_height, _pad_width, _pad_height, _axises); + break; + case ::arm_compute::DataType::F32: + ApplyTransposeConv<float>(_input->info()->tensor_shape(), _input, + _weights->info()->tensor_shape(), _weights, + _output->info()->tensor_shape(), _output, _stride_width, + _stride_height, _pad_width, _pad_height, _axises); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported"); + break; + } + + if (::internal::arm_compute::isGpuMode()) + { + auto &q = ::arm_compute::CLScheduler::get().queue(); + + CAST_CL(_input)->unmap(q); + CAST_CL(_weights)->unmap(q); + CAST_CL(_output)->unmap(q); + } +} |