diff options
Diffstat (limited to 'libs/kernel/acl/src/cl/Pooling.cpp')
-rw-r--r-- | libs/kernel/acl/src/cl/Pooling.cpp | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/libs/kernel/acl/src/cl/Pooling.cpp b/libs/kernel/acl/src/cl/Pooling.cpp new file mode 100644 index 000000000..e22eacccc --- /dev/null +++ b/libs/kernel/acl/src/cl/Pooling.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> +#include "../IO_accessor.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" + +#include <cassert> + +namespace nnfw { +namespace kernel { +namespace acl { + +bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX, + arm_compute::Size2D(filter_width,filter_height), + pad_info, false); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>(); + pool_f->configure(input.ptr(), output.ptr(), maxpool_info); + + fns.emplace_back(pool_f); + + input.allocate(); + output.allocate(); + + util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t filter_width, int32_t filter_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG, + arm_compute::Size2D(filter_width,filter_height), + pad_info, true); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + + auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>(); + pool_f->configure(input.ptr(), output.ptr(), pool_info); + + fns.emplace_back(pool_f); + + input.allocate(); + output.allocate(); + + util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + + for (const auto &fn : fns) + { + fn->run(); + } + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw |