diff options
Diffstat (limited to 'runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc')
-rw-r--r-- | runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc b/runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc new file mode 100644 index 000000000..aa1fd9aed --- /dev/null +++ b/runtimes/neurun/backend/acl_cl/kernel/ConcatLayer.cc @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConcatLayer.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +#include "util/feature/nchw/View.h" +#include "util/logging.h" + +namespace +{ + +inline bool matchSizeExceptAxis(const ::neurun::backend::acl_cl::operand::ICLTensor *t1, + const ::neurun::backend::acl_cl::operand::ICLTensor *t2, + uint32_t axis) +{ + assert(t1->num_dimensions() <= 4); + assert(t2->num_dimensions() <= 4); + + for (uint32_t i = 0; i < 4; i++) + { + if (axis == i) + continue; + if (t1->info()->dimension(i) != t2->info()->dimension(i)) + return false; + } + return true; +} + +} // namespace {anonymous} + +namespace neurun +{ +namespace backend +{ +namespace acl_cl +{ +namespace kernel +{ + +ConcatLayer::ConcatLayer() + : _input_allocs(), _output_alloc(nullptr), _axis(0), _input_type(arm_compute::DataType::F32) +{ + // DO NOTHING +} + +template <typename T> bool ConcatLayer::concatenate() +{ + // Input and output size check + { + // NOTE Support only tensor with dimension 4 or less + + uint32_t axis_sum = 0; + + for (auto input : _input_allocs) + { + assert(_output_alloc->ptr()->layout() == input->ptr()->layout()); + assert(matchSizeExceptAxis(_output_alloc->ptr(), input->ptr(), _axis)); + axis_sum += input->ptr()->info()->dimension(_axis); + } + + assert(_output_alloc->ptr()->info()->dimension(_axis) == axis_sum); + } + + VERBOSE(Concat_RUN) << "START Concat" << std::endl; + + // Perform operation + { + uint32_t axis_offset = 0; + + auto outout_fn = [&](::neurun::backend::operand::ITensor &out_tensor) { + for (auto input : _input_allocs) + { + auto &out_cl_tensor = + static_cast<::neurun::backend::acl_cl::operand::ICLTensor &>(out_tensor); + auto input_fn = [&](::neurun::backend::operand::ITensor &in_tensor) { + auto &in_cl_tensor = + static_cast<::neurun::backend::acl_cl::operand::ICLTensor &>(in_tensor); + for (uint32_t i = 0; i < in_cl_tensor.info()->dimension(0); i++) + { + for (uint32_t j = 0; j < in_cl_tensor.info()->dimension(1); j++) + { + for (uint32_t k = 0; k < in_cl_tensor.info()->dimension(2); k++) + { + for (uint32_t l = 0; l < in_cl_tensor.info()->dimension(3); l++) + { + int32_t io = (_axis == 0) ? axis_offset : 0; + int32_t jo = (_axis == 1) ? axis_offset : 0; + int32_t ko = (_axis == 2) ? axis_offset : 0; + int32_t lo = (_axis == 3) ? axis_offset : 0; + T value = + *reinterpret_cast<T *>(in_cl_tensor.handle()->ptr_to_element({i, j, k, l})); + *reinterpret_cast<T *>(out_cl_tensor.handle()->ptr_to_element( + {i + io, j + jo, k + ko, l + lo})) = value; + } + } + } + } + if (_axis == 0) + axis_offset += in_cl_tensor.info()->dimension(0); + if (_axis == 1) + axis_offset += in_cl_tensor.info()->dimension(1); + if (_axis == 2) + axis_offset += in_cl_tensor.info()->dimension(2); + if (_axis == 3) + axis_offset += in_cl_tensor.info()->dimension(3); + }; + input->access(input_fn); + } + }; + _output_alloc->access(outout_fn); + } + + VERBOSE(Concat_RUN) << "End Concat" << std::endl; + + return true; +} + +void ConcatLayer::configure( + const std::vector<::neurun::backend::acl_cl::operand::Object *> &input_allocs, int32_t axis, + ::neurun::backend::acl_cl::operand::Object *output_alloc) +{ + _input_allocs = input_allocs; + _output_alloc = output_alloc; + + assert(axis < 4); + + // TODO Handle when axis is negative + assert(axis >= 0); + + _axis = axis; + + _input_type = input_allocs[0]->ptr()->data_type(); +} + +void ConcatLayer::run() +{ + if (_input_type == arm_compute::DataType::F32) + { + concatenate<float>(); + } + else if (_input_type == arm_compute::DataType::QASYMM8) + { + concatenate<uint8_t>(); + } +} + +} // namespace kernel +} // namespace acl_cl +} // namespace backend +} // namespace neurun |