summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>2019-09-17 08:23:58 (GMT)
committer이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>2019-09-17 08:23:58 (GMT)
commit4d4f85e1561eaf7dfd9f4126e24b85af24168d55 (patch)
tree2374e26b92116afca22c8976ff5dd0e1c1facf5e
parentaa5f9788dc53e35360cdaea6e219d20d1cf23c5f (diff)
downloadnnfw-4d4f85e1561eaf7dfd9f4126e24b85af24168d55.zip
nnfw-4d4f85e1561eaf7dfd9f4126e24b85af24168d55.tar.gz
nnfw-4d4f85e1561eaf7dfd9f4126e24b85af24168d55.tar.bz2
Make to support ArgMax op for acl neon (#7515)
This commit makes to support ArgMax op for acl neon except int32 type. - Introduce NEArgMinMax layer - Apply NEArgMinMax layer for neurun Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h1
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h81
-rw-r--r--runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp109
-rw-r--r--runtimes/neurun/backend/acl_neon/KernelGenerator.cc45
-rw-r--r--runtimes/neurun/backend/acl_neon/KernelGenerator.h1
-rw-r--r--runtimes/neurun/backend/acl_neon/ShapeFixer.cc2
-rw-r--r--runtimes/neurun/backend/acl_neon/ShapeFixer.h1
-rw-r--r--tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon4
8 files changed, 243 insertions, 1 deletions
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 6eb0830..fb5323d 100644
--- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,6 +16,7 @@
#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
+#include <arm_compute/runtime/NEON/functions/NEArgMinMax.h>
#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEPReLU.h>
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
new file mode 100644
index 0000000..604cd93
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
+#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform reduce min/max operation */
+template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
+{
+public:
+ /** Constructor */
+ NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] axis Reduction axis.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ */
+ void configure(ITensor *input, int axis, ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] axis Reduction axis.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ *
+ * @return A status
+ */
+ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ MemoryGroup _memory_group;
+ NEArgMinMaxLayer _reduction_kernel;
+ Tensor _reduced_out;
+ NEReshapeLayer _reshape;
+};
+
+/** Basic function to run arg max. */
+using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
+/** Basic function to run arg min. */
+using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
new file mode 100644
index 0000000..5ba465b
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
+
+#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+namespace arm_compute
+{
+
+template <ReductionOperation OP>
+NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
+{
+}
+
+template <ReductionOperation OP>
+Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
+ DataType::F32);
+
+ TensorShape out_shape = input->tensor_shape();
+ const int input_dims = input->num_dimensions();
+ int axis_local = axis;
+
+ // Convert negative axis
+ axis_local = wrap_around(axis_local, input_dims);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
+ ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
+ out_shape.remove_dimension(axis_local);
+
+ const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
+
+ return Status{};
+}
+
+template <ReductionOperation OP>
+void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+ int axis_local = axis;
+ const int input_dims = input->info()->num_dimensions();
+
+ // Convert negative axis
+ axis_local = wrap_around(axis_local, input_dims);
+
+ // Perform reduction for axis
+ TensorShape intermediate_shape = input->info()->tensor_shape();
+ intermediate_shape.set(axis_local, 1);
+ auto in = input;
+
+ _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
+ output->info()->data_type(),
+ output->info()->quantization_info()));
+ _memory_group.manage(&_reduced_out);
+ _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
+
+ // Allocate intermediate tensor
+ _reduced_out.allocator()->allocate();
+
+ // Configure reshape layer if we want to drop the dimensions
+ TensorShape out_shape = input->info()->tensor_shape();
+ out_shape.remove_dimension(axis_local);
+ auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
+ _reshape.configure(&_reduced_out, output);
+}
+
+template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
+{
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ _reduction_kernel.run();
+ _reshape.run();
+}
+
+// Supported Specializations
+template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
+template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
+} // namespace arm_compute
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
index 32222d1..0293b83 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
@@ -181,6 +181,51 @@ void KernelGenerator::visit(const model::operation::AbsNode &node)
_execution_builder->append(std::move(acl_fn));
}
+void KernelGenerator::visit(const model::operation::ArgMaxNode &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(model::operation::ArgMaxNode::Input::INPUT)};
+ const auto axis_index{node.param().axis_index};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto axis_shape = _ctx.at(axis_index).shape();
+
+ assert(_ctx.at(axis_index).isConstant());
+ // Axis rank is always 1.
+ assert(axis_shape.rank() == 1);
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ const auto ifm_rank = ifm_shape.rank();
+ auto frontend_layout = _current_subg_layout;
+ auto backend_layout = ifm_alloc->layout();
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ assert(axis_value >= 0 && axis_value < ifm_rank);
+ const auto fixed_axis =
+ acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
+
+ // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
+
+ // NOTE
+ // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
+ //{
+ ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
+ //}
+ fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
+ // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+ // arm_compute::ReductionOperation::ARG_IDX_MAX);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _execution_builder->append(std::move(acl_fn));
+}
+
void KernelGenerator::visit(const model::operation::Conv2DNode &node)
{
using model::operation::Conv2DNode;
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
index 7b93c4f..28ef565 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
@@ -37,6 +37,7 @@ public:
void visit(const model::Subgraph &) override;
void visit(const model::operation::AbsNode &) override;
+ void visit(const model::operation::ArgMaxNode &) override;
void visit(const model::operation::Conv2DNode &) override;
void visit(const model::operation::DepthwiseConv2DNode &) override;
void visit(const model::operation::MaxPool2DNode &) override;
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
index 1fdb5db..f78b566 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
@@ -61,6 +61,8 @@ ShapeFixer::ShapeFixer(const neurun::model::Operands &ctx,
void ShapeFixer::visit(const model::operation::AbsNode &) { /* DO NOTHING */}
+void ShapeFixer::visit(const model::operation::ArgMaxNode &) { /* DO NOTHING */}
+
void ShapeFixer::visit(const model::operation::Conv2DNode &) { /* DO NOTHING */}
void ShapeFixer::visit(const model::operation::DepthwiseConv2DNode &) { /* DO NOTHING */}
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
index f5c6721..796ea39 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
@@ -38,6 +38,7 @@ public:
std::shared_ptr<ITensorBuilder> tensor_builder() override { return _tensor_builder; }
void visit(const model::operation::AbsNode &) override;
+ void visit(const model::operation::ArgMaxNode &) override;
void visit(const model::operation::Conv2DNode &) override;
void visit(const model::operation::DepthwiseConv2DNode &) override;
void visit(const model::operation::MaxPool2DNode &) override;
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index cc93f97..255ef0b 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -3,7 +3,6 @@
#
# Not support operations
TrivialTest.BroadcastMulTwo
-GeneratedTests.argmax*
GeneratedTests.depth_to_space*
GeneratedTests.dequantize
GeneratedTests.embedding_lookup*
@@ -28,3 +27,6 @@ GeneratedTests.exp_ex_1D_float
GeneratedTests.exp_ex_2D_float
# Unsupported optional input that has shape
GeneratedTests.lstm2*
+# Unsupported data type
+GeneratedTests.argmax_ex_int32
+GeneratedTests.argmax_ex_neg_axis_int32