/* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include // TODO: fix include path in CMakeFiles #include "../IO_accessor.h" #include "../shape.h" namespace nnfw { namespace kernel { namespace acl { bool concatenationFloat32(const std::vector& inputDataPtrs, const std::vector& inputShapes, int32_t axis, float* outputData, const nnfw::rt::Shape& outputShape) { if (axis != 3) { assert("Only support axis=3 for ACL" && 0); return false; } assert(inputDataPtrs.size() == inputShapes.size()); std::vector inputPtrs; std::vector inputIptrs; arm_compute::CLTensor output; // init Tensors std::vector::const_iterator it_inputShape = inputShapes.begin(); for (auto inputData : inputDataPtrs) { const nnfw::rt::Shape& inputShape = *it_inputShape; arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); arm_compute::CLTensor* inputPtr = new arm_compute::CLTensor(); inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); inputPtrs.push_back(inputPtr); inputIptrs.push_back(inputPtr); it_inputShape++; } arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); // prepare ACL Concatenate and configure tensors auto concat = std::make_shared(); concat->configure(inputIptrs, &output); // allocate Tensors it_inputShape = inputShapes.begin(); std::vector::const_iterator it_inputData = inputDataPtrs.begin(); for (auto inputPtr : inputPtrs) { inputPtr->allocator()->allocate(); const float* inputData = *it_inputData; const nnfw::rt::Shape& inputShape = *it_inputShape; TensorAccess(*inputPtr, inputData, inputShape); it_inputShape++; it_inputData++; } output.allocator()->allocate(); // run concat->run(); arm_compute::CLScheduler::get().sync(); // get output TensorAccess(output, outputData, outputShape); // cleanup for (auto inputPtr : inputPtrs) { inputPtr->allocator()->free(); delete inputPtr; } output.allocator()->free(); return true; } } // namespace acl } // namespace kernel } // namespace nnfw