diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2018-05-04 17:57:16 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2018-05-04 17:57:16 +0900 |
commit | 07659ccd9fe7b1cf1547cc6cad78bcf489f0a361 (patch) | |
tree | cf3a123812b7f1ad8b50d7d0ace891e0c03c6110 /libs/kernel/acl/src/cl/Concatenation.cpp | |
parent | da6f7a3e8360a49fd073a6e0031a4da134d9d984 (diff) | |
download | nnfw-07659ccd9fe7b1cf1547cc6cad78bcf489f0a361.tar.gz nnfw-07659ccd9fe7b1cf1547cc6cad78bcf489f0a361.tar.bz2 nnfw-07659ccd9fe7b1cf1547cc6cad78bcf489f0a361.zip |
Imported Upstream version 0.1upstream/0.1submit/tizen/20180504.091146
Diffstat (limited to 'libs/kernel/acl/src/cl/Concatenation.cpp')
-rw-r--r-- | libs/kernel/acl/src/cl/Concatenation.cpp | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/libs/kernel/acl/src/cl/Concatenation.cpp b/libs/kernel/acl/src/cl/Concatenation.cpp new file mode 100644 index 000000000..9376006ca --- /dev/null +++ b/libs/kernel/acl/src/cl/Concatenation.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <cassert> + +// TODO: fix include path in CMakeFiles +#include "../IO_accessor.h" +#include "../shape.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs, + const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + if (axis != 3) + { + assert("Only support axis=3 for ACL" && 0); + return false; + } + assert(inputDataPtrs.size() == inputShapes.size()); + + std::vector<arm_compute::CLTensor*> inputPtrs; + std::vector<arm_compute::ICLTensor*> inputIptrs; + arm_compute::CLTensor output; + + // init Tensors + std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin(); + for (auto inputData : inputDataPtrs) + { + const nnfw::rt::Shape& inputShape = *it_inputShape; + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::CLTensor* inputPtr = new arm_compute::CLTensor(); + + inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + inputPtrs.push_back(inputPtr); + inputIptrs.push_back(inputPtr); + + it_inputShape++; + } + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + // prepare ACL Concatenate and configure tensors + auto concat = std::make_shared<arm_compute::CLDepthConcatenateLayer>(); + concat->configure(inputIptrs, &output); + + // allocate Tensors + it_inputShape = inputShapes.begin(); + std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin(); + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->allocate(); + + const float* inputData = *it_inputData; + const nnfw::rt::Shape& inputShape = *it_inputShape; + + TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape); + + it_inputShape++; + it_inputData++; + } + output.allocator()->allocate(); + + // run + concat->run(); + arm_compute::CLScheduler::get().sync(); + + // get output + TensorAccess<OutputAccessor>(output, outputData, outputShape); + + // cleanup + for (auto inputPtr : inputPtrs) + { + inputPtr->allocator()->free(); + delete inputPtr; + } + output.allocator()->free(); + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw |