diff options
Diffstat (limited to 'libs/kernel/acl/src/neon/Concatenation.cpp')
-rw-r--r-- | libs/kernel/acl/src/neon/Concatenation.cpp | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/libs/kernel/acl/src/neon/Concatenation.cpp b/libs/kernel/acl/src/neon/Concatenation.cpp new file mode 100644 index 000000000..8738a9d12 --- /dev/null +++ b/libs/kernel/acl/src/neon/Concatenation.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" + +namespace nnfw { +namespace kernel { +namespace acl { +namespace neon { + +bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs, + const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + if (axis != 3) + { + assert("Only support axis=3 for ACL" && 0); + return false; + } + assert(inputDataPtrs.size() == inputShapes.size()); + + std::vector<arm_compute::Tensor*> inputPtrs; + std::vector<arm_compute::ITensor*> inputIptrs; + arm_compute::Tensor output; + + // init Tensors + std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin(); + for (auto inputData : inputDataPtrs) + { + const nnfw::rt::Shape& inputShape = *it_inputShape; + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::Tensor* inputPtr = new arm_compute::Tensor(); + + inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + inputPtrs.push_back(inputPtr); + inputIptrs.push_back(inputPtr); + + it_inputShape++; + } + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + // prepare ACL Concatenate and configure tensors + auto concat = std::make_shared<arm_compute::NEDepthConcatenateLayer>(); + concat->configure(inputIptrs, &output); + + // allocate Tensors + it_inputShape = inputShapes.begin(); + std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin(); + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->allocate(); + + const float* inputData = *it_inputData; + const nnfw::rt::Shape& inputShape = *it_inputShape; + + TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape); + + it_inputShape++; + it_inputData++; + } + output.allocator()->allocate(); + + // run + concat->run(); + + // get output + TensorAccess<OutputAccessor>(output, outputData, outputShape); + + // cleanup + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->free(); + delete inputPtr; + } + output.allocator()->free(); + + return true; +} + +} // namespace neon +} // namespace acl +} // namespace kernel +} // namespace nnfw |