diff options
Diffstat (limited to 'libs/kernel/acl/src/cl/Conv2D.cpp')
-rw-r--r-- | libs/kernel/acl/src/cl/Conv2D.cpp | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/libs/kernel/acl/src/cl/Conv2D.cpp b/libs/kernel/acl/src/cl/Conv2D.cpp new file mode 100644 index 000000000..4783bdc1d --- /dev/null +++ b/libs/kernel/acl/src/cl/Conv2D.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <OperationsUtils.h> +#include <NeuralNetworks.h> + +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/TensorInfo.h> + +#include <util/environment.h> + +#include "../IO_accessor.h" +#include "../util.h" +#include "../shape.h" +#include "../CLUniqueTensor.h" +#include "../support.h" + +#include "util/feature/TextFormatter.h" + +#include "support/nnapi/feature/Reader.h" + +namespace nnfw { +namespace kernel { +namespace acl { + +static int verbose = 0; + +bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + const float* filterData, const nnfw::rt::Shape& filterShape, + const float* biasData, const nnfw::rt::Shape& biasShape, + int32_t padding_left, int32_t padding_right, + int32_t padding_top, int32_t padding_bottom, + int32_t stride_width, int32_t stride_height, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape) +{ + arm_compute::TensorShape input_shape = util::fromNNShape(inputShape); + arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape); + arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape); + arm_compute::TensorShape output_shape = util::fromNNShape(outputShape); + arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height, + padding_left, padding_right, + padding_top, padding_bottom, + arm_compute::DimensionRoundingType::FLOOR); + + CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32)); + CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32)); + CLUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32)); + CLUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32)); + + std::vector<std::shared_ptr<arm_compute::IFunction>> fns; + + auto conv_f = std::make_shared<arm_compute::CLConvolutionLayer>(); + + conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info); + + fns.emplace_back(conv_f); + + util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns); + + input.allocate(); + output.allocate(); + bias.allocate(); + filter.allocate(); + + TensorAccess<InputAccessor>(input.ref(), inputData, inputShape); + TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape); + TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape); + + nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose); + if (verbose) + { + input.ref().map(); + auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape); + nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData}; + nnfw::support::acl::feature::Reader<float> acl_ifm_reader{input.ptr()}; + + std::cout << "NNAPI IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl; + + std::cout << "ARM Compute IFM:" << std::endl; + std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl; + input.ref().unmap(); + } + + for (const auto &fn : fns) + { + fn->run(); + } + + arm_compute::CLScheduler::get().sync(); + + TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape); + + return true; +} + +} // namespace acl +} // namespace kernel +} // namespace nnfw |