summaryrefslogtreecommitdiff
path: root/runtimes
diff options
context:
space:
mode:
author장지섭/On-Device Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>2019-09-05 20:15:52 +0900
committer오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>2019-09-05 20:15:52 +0900
commit9ff6af3ba6353c58a0f09217bacd81b1ef3ecc24 (patch)
tree73e75bd9e5ceef861d64c845fb2f02976cfdc185 /runtimes
parent0c1c65bcc14a0e960760e58ab5b407e692a94ac6 (diff)
downloadnnfw-9ff6af3ba6353c58a0f09217bacd81b1ef3ecc24.tar.gz
nnfw-9ff6af3ba6353c58a0f09217bacd81b1ef3ecc24.tar.bz2
nnfw-9ff6af3ba6353c58a0f09217bacd81b1ef3ecc24.zip
Enable binary Logical ops for ACL neon (#7233)
This commit enables to support binary Logical ops for ACL neon. - Implement kernel of acl_neon for binary logical ops - Enable to support these ops for acl_neon on the neurun Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
Diffstat (limited to 'runtimes')
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h70
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h1
-rw-r--r--runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h98
-rw-r--r--runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp326
-rw-r--r--runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp70
-rw-r--r--runtimes/neurun/backend/acl_neon/KernelGenerator.cc46
-rw-r--r--runtimes/neurun/backend/acl_neon/KernelGenerator.h2
-rw-r--r--runtimes/neurun/backend/acl_neon/ShapeFixer.cc34
-rw-r--r--runtimes/neurun/backend/acl_neon/ShapeFixer.h2
9 files changed, 649 insertions, 0 deletions
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h b/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
new file mode 100644
index 000000000..61992bd50
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
+#define __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__
+
+#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "arm_compute/core/TypesEx.h"
+
+namespace arm_compute
+{
+
+class NEBinaryLogicalOperationKernel : public NEElementwiseOperationKernel
+{
+public:
+ /** Default destructor */
+ ~NEBinaryLogicalOperationKernel() = default;
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] op Binary logical operation to be executed.
+ * @param[in] input1 First tensor input. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(BinaryLogicalOperation op, const ITensor *input1, const ITensor *input2,
+ ITensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] op Binary logical operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a Status
+ */
+ static Status validate(BinaryLogicalOperation op, const ITensorInfo *input1,
+ const ITensorInfo *input2, const ITensorInfo *output);
+
+protected:
+ // Inherited methods overridden:
+ static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
+ const ITensorInfo &output);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATIONKERNEL_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
index 5711a3b00..6cdce9ed0 100644
--- a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,6 +16,7 @@
#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
+#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEReduceMeanEx.h>
#include <arm_compute/runtime/NEON/functions/NERNNLayerEx.h>
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
new file mode 100644
index 000000000..2a624656d
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+#define __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__
+
+#include "arm_compute/core/TypesEx.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEBinaryLogicalOperationKernel.
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/U8.
+ * @note The function performs a binary logical operation between two tensors.
+ */
+class NEBinaryLogicalOperation : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8.
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ * @param[in] op Binary Logical Operation to be performed.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output, BinaryLogicalOperation op);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] op Binary Logical Operation to be performed.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, BinaryLogicalOperation op);
+};
+
+/** Basic function to run @ref NEBinaryLogicalOperationKernel
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/U8.
+ * @note The function performs a binary logical operation between two tensors.
+ */
+template <BinaryLogicalOperation op> class NEBinaryLogicalOperationStatic : public INESimpleFunction
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/U8
+ * @param[in, out] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(ITensor *input1, ITensor *input2, ITensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref
+ * NEBinaryLogicalOperationKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/U8
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output);
+};
+
+/** Basic function to run equal comparison. */
+using NELogicalAnd = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>;
+/** Basic function to run not equal comparison. */
+using NELogicalOr = NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>;
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEBINARYLOGICALOPERATION_H__ */
diff --git a/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
new file mode 100644
index 000000000..b6cf7c93f
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+
+#include <algorithm>
+#include <arm_neon.h>
+#include <map>
+#include <string>
+
+namespace arm_compute
+{
+class Coordinates;
+} // namespace arm_compute
+
+namespace arm_compute
+{
+
+template <BinaryLogicalOperation op, typename ScalarType>
+inline ScalarType elementwise_logic_op_scalar(const ScalarType &a, const ScalarType &b)
+{
+ auto res = ScalarType(0);
+
+ switch (op)
+ {
+ case BinaryLogicalOperation::AND:
+ res = a & b;
+ break;
+ case BinaryLogicalOperation::OR:
+ res = a | b;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+ return res;
+}
+
+template <BinaryLogicalOperation op, typename VectorType>
+inline VectorType elementwise_logic_op(const VectorType &a, const VectorType &b)
+{
+ VectorType res = {0, 0, 0, 0};
+
+ switch (op)
+ {
+ case BinaryLogicalOperation::AND:
+ res = wrapper::vand(a, b);
+ break;
+ case BinaryLogicalOperation::OR:
+ res = wrapper::vorr(a, b);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+ return res;
+}
+
+template <BinaryLogicalOperation op>
+inline uint8x16x4_t elementwise_logic_op(const uint8x16x4_t &a, const uint8x16x4_t &b)
+{
+ uint8x16x4_t out = {{
+ elementwise_logic_op<op>(a.val[0], b.val[0]), elementwise_logic_op<op>(a.val[1], b.val[1]),
+ elementwise_logic_op<op>(a.val[2], b.val[2]), elementwise_logic_op<op>(a.val[3], b.val[3]),
+ }};
+ return out;
+}
+
+template <BinaryLogicalOperation op, typename ScalarType, typename VectorType>
+inline VectorType elementwise_logic_op_broadcast(const VectorType &a,
+ const ScalarType &broadcast_value,
+ const bool reorder)
+{
+ VectorType broadcast_vector = wrapper::vdup_n(broadcast_value, wrapper::traits::vector_128_tag());
+ return elementwise_logic_op<op>(reorder ? broadcast_vector : a, reorder ? a : broadcast_vector);
+}
+
+template <BinaryLogicalOperation op, typename ScalarType, typename VectorType>
+inline int elementwise_logic_op_loop(int window_start_x, int window_end_x, int window_step_x,
+ const ScalarType *input1_ptr, const ScalarType *input2_ptr,
+ ScalarType *output_ptr)
+{
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto a = wrapper::vloadq(input1_ptr + x);
+ const auto b = wrapper::vloadq(input2_ptr + x);
+ wrapper::vstore(output_ptr + x, elementwise_logic_op<op>(a, b));
+ }
+ return x;
+}
+
+template <BinaryLogicalOperation op, typename ScalarType, typename VectorType>
+inline int elementwise_logic_op_broadcast_loop(int window_start_x, int window_end_x,
+ int window_step_x,
+ const ScalarType *non_broadcast_input_ptr,
+ const ScalarType &broadcast_value,
+ ScalarType *output_ptr, const bool reorder)
+{
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto a = wrapper::vloadq((non_broadcast_input_ptr + x));
+ wrapper::vstore(output_ptr + x,
+ elementwise_logic_op_broadcast<op>(a, broadcast_value, reorder));
+ }
+ return x;
+}
+
+template <typename InputScalarType, typename OutputScalarType, typename InputVectorType>
+void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window,
+ OutputScalarType (*scalar_func)(const InputScalarType &,
+ const InputScalarType &),
+ int (*broadcast_func)(int, int, int, const InputScalarType *,
+ const InputScalarType &, OutputScalarType *, const bool),
+ int (*neon_func)(int, int, int, const InputScalarType *,
+ const InputScalarType *, OutputScalarType *))
+{
+ // Create input windows
+ Window input1_win = window.broadcast_if_dimension_le_one(in1->info()->tensor_shape());
+ Window input2_win = window.broadcast_if_dimension_le_one(in2->info()->tensor_shape());
+
+ // Clear X Dimension on execution window as we handle manually
+ Window win = window;
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ const int window_step_x = std::min(16 / static_cast<int>(sizeof(OutputScalarType)), 8);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+
+ if (is_broadcast_across_x)
+ {
+ const bool is_broadcast_input_2 = input2_win.x().step() == 0;
+ Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win;
+ Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win;
+ const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
+ const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
+
+ // Clear X Dimension on execution window as we handle manually
+ non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator broadcast_input(broadcast_tensor, broadcast_win);
+ Iterator non_broadcast_input(non_broadcast_tensor, non_broadcast_win);
+ Iterator output(out, win);
+
+ execute_window_loop(win,
+ [&](const Coordinates &) {
+ auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+ const auto non_broadcast_input_ptr =
+ reinterpret_cast<const InputScalarType *>(non_broadcast_input.ptr());
+ const InputScalarType broadcast_value =
+ *reinterpret_cast<const InputScalarType *>(broadcast_input.ptr());
+
+ int x = (*broadcast_func)(window_start_x, window_end_x, window_step_x,
+ non_broadcast_input_ptr, broadcast_value,
+ output_ptr, !is_broadcast_input_2);
+ for (; x < window_end_x; ++x)
+ {
+ const auto a = *(non_broadcast_input_ptr + x);
+ *(output_ptr + x) =
+ (*scalar_func)(!is_broadcast_input_2 ? broadcast_value : a,
+ !is_broadcast_input_2 ? a : broadcast_value);
+ }
+ },
+ broadcast_input, non_broadcast_input, output);
+ }
+ else
+ {
+ // Clear X Dimension on execution window as we handle manually
+ input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
+ input2_win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input1(in1, input1_win);
+ Iterator input2(in2, input2_win);
+ Iterator output(out, win);
+
+ execute_window_loop(win,
+ [&](const Coordinates &) {
+ auto output_ptr = reinterpret_cast<OutputScalarType *>(output.ptr());
+ const auto input1_ptr =
+ reinterpret_cast<const InputScalarType *>(input1.ptr());
+ const auto input2_ptr =
+ reinterpret_cast<const InputScalarType *>(input2.ptr());
+
+ int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
+ input1_ptr, input2_ptr, output_ptr);
+ for (; x < window_end_x; ++x)
+ {
+ const auto a = *(input1_ptr + x);
+ const auto b = *(input2_ptr + x);
+ *(output_ptr + x) = (*scalar_func)(a, b);
+ }
+ },
+ input1, input2, output);
+ }
+}
+
+template <BinaryLogicalOperation op, typename ScalarType, typename VectorType>
+void elementwise_logic_op(const ITensor *in1, const ITensor *in2, ITensor *out,
+ const Window &window)
+{
+ elementwise_op<ScalarType, ScalarType, VectorType>(
+ in1, in2, out, window, &elementwise_logic_op_scalar<op, ScalarType>,
+ &elementwise_logic_op_broadcast_loop<op, ScalarType, VectorType>,
+ &elementwise_logic_op_loop<op, ScalarType, VectorType>);
+}
+
+std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)> configure_func(
+ const ITensor *input1, const ITensor *input2, ITensor *output,
+ std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function)
+{
+ std::string function_to_call("op_");
+ function_to_call += string_from_data_type(input1->info()->data_type()) + "_";
+ function_to_call += string_from_data_type(input2->info()->data_type()) + "_";
+ function_to_call += string_from_data_type(output->info()->data_type());
+
+ auto it = map_function.find(function_to_call);
+
+ if (it != map_function.end())
+ {
+ auto func = it->second;
+ return [func](const ITensor *input1, const ITensor *input2, ITensor *output,
+ const Window &window) { func(input1, input2, output, window); };
+ }
+ return nullptr;
+}
+
+template <BinaryLogicalOperation op>
+std::function<void(const ITensor *, const ITensor *, ITensor *, const Window &)>
+configure_logic_func(const ITensor *input1, const ITensor *input2, ITensor *output)
+{
+ static std::map<std::string, NEElementwiseOperationKernel::ElementwiseFunction *> map_function = {
+ {"op_U8_U8_U8", &elementwise_logic_op<op, uint8_t, uint8x16_t>},
+ {"op_QASYMM8_QASYMM8_QASYMM8", &elementwise_logic_op<op, uint8_t, uint8x16_t>}};
+
+ return configure_func(input1, input2, output, map_function);
+}
+
+void NEBinaryLogicalOperationKernel::configure(BinaryLogicalOperation op, const ITensor *input1,
+ const ITensor *input2, ITensor *output)
+{
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info()));
+ configure_common(input1, input2, output);
+ switch (op)
+ {
+ case BinaryLogicalOperation::AND:
+ _function = configure_logic_func<BinaryLogicalOperation::AND>(input1, input2, output);
+ break;
+ case BinaryLogicalOperation::OR:
+ _function = configure_logic_func<BinaryLogicalOperation::OR>(input1, input2, output);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+}
+
+Status NEBinaryLogicalOperationKernel::validate_arguments(const ITensorInfo &input1,
+ const ITensorInfo &input2,
+ const ITensorInfo &output)
+{
+ // Validate in case of configured output
+ if (output.total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8,
+ DataType::QASYMM8);
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
+
+ const TensorShape out_shape =
+ TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
+ "Inputs are not broadcast compatible");
+
+ // Validate in case of configured output
+ if (output.total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
+ "Wrong shape for output");
+ }
+
+ return Status{};
+}
+
+Status NEBinaryLogicalOperationKernel::validate(BinaryLogicalOperation op,
+ const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_UNUSED(op);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
+ return Status{};
+}
+
+} // namespace arm_compute
diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
new file mode 100644
index 000000000..7c15fc453
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2018-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
+#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
+
+#include "arm_compute/core/ITensor.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+namespace arm_compute
+{
+
+template <BinaryLogicalOperation COP>
+void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
+ ITensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ k->configure(COP, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+template <BinaryLogicalOperation COP>
+Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output)
+{
+ return NEBinaryLogicalOperationKernel::validate(COP, input1, input2, output);
+}
+
+void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
+ BinaryLogicalOperation op)
+{
+ auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+ k->configure(op, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status NEBinaryLogicalOperation::validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, BinaryLogicalOperation op)
+{
+ return NEBinaryLogicalOperationKernel::validate(op, input1, input2, output);
+}
+
+// Supported Specializations
+template class NEBinaryLogicalOperationStatic<BinaryLogicalOperation::AND>;
+template class NEBinaryLogicalOperationStatic<BinaryLogicalOperation::OR>;
+} // namespace arm_compute
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
index 836b99ceb..f5e71e448 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
@@ -690,6 +690,29 @@ void KernelGenerator::visit(const model::operation::LocalResponseNormalizationNo
_execution_builder->append(std::move(acl_fn));
}
+void KernelGenerator::visit(const model::operation::LogicalAndNode &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT1)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ auto l = nnfw::cpp14::make_unique<::arm_compute::NELogicalAnd>();
+
+ l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+
+ fn = std::move(l);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _execution_builder->append(std::move(acl_fn));
+}
+
void KernelGenerator::visit(const model::operation::LogicalNotNode &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -711,6 +734,29 @@ void KernelGenerator::visit(const model::operation::LogicalNotNode &node)
_execution_builder->append(std::move(acl_fn));
}
+void KernelGenerator::visit(const model::operation::LogicalOrNode &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input0_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT1)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input0_alloc = _tensor_builder->at(input0_index).get();
+ auto input1_alloc = _tensor_builder->at(input1_index).get();
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ auto l = nnfw::cpp14::make_unique<::arm_compute::NELogicalOr>();
+
+ l->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+
+ fn = std::move(l);
+
+ auto acl_fn = asAclFunction(std::move(fn));
+
+ _execution_builder->append(std::move(acl_fn));
+}
+
void KernelGenerator::visit(const model::operation::LogisticNode &node)
{
const auto ofm_index{node.getOutputs().at(0)};
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
index a3cc5c28e..00284d3f3 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
@@ -48,7 +48,9 @@ public:
void visit(const model::operation::L2NormalizationNode &) override;
void visit(const model::operation::L2Pool2DNode &) override;
void visit(const model::operation::LocalResponseNormalizationNode &) override;
+ void visit(const model::operation::LogicalAndNode &) override;
void visit(const model::operation::LogicalNotNode &) override;
+ void visit(const model::operation::LogicalOrNode &) override;
void visit(const model::operation::LogisticNode &) override;
void visit(const model::operation::LSTMNode &) override;
void visit(const model::operation::MulNode &) override;
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
index e155cf231..7b0b3fdf3 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
@@ -102,8 +102,42 @@ void ShapeFixer::visit(const model::operation::L2Pool2DNode &) { /* DO NOTHING *
void ShapeFixer::visit(const model::operation::LocalResponseNormalizationNode &) { /* DO NOTHING */}
+void ShapeFixer::visit(const model::operation::LogicalAndNode &node)
+{
+ const auto input0_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(model::operation::LogicalAndNode::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<::neurun::model::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<::neurun::model::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
void ShapeFixer::visit(const model::operation::LogicalNotNode &) { /* DO NOTHING */}
+void ShapeFixer::visit(const model::operation::LogicalOrNode &node)
+{
+ const auto input0_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT0)};
+ const auto input1_index{node.getInputs().at(model::operation::LogicalOrNode::Input::INPUT1)};
+
+ if (!(_ctx.at(input0_index).shape() == _ctx.at(input1_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input0_index).shape().rank(), _ctx.at(input1_index).shape().rank());
+
+ // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
+ // a node to extend shape may be inserted in front of this operation
+ const_cast<::neurun::model::Shape &>(_ctx.at(input0_index).shape()).extendRank(broadcast_rank);
+ const_cast<::neurun::model::Shape &>(_ctx.at(input1_index).shape()).extendRank(broadcast_rank);
+ }
+}
+
void ShapeFixer::visit(const model::operation::LogisticNode &) { /* DO NOTHING */}
void ShapeFixer::visit(const model::operation::LSTMNode &) { /* DO NOTHING */}
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
index 3baff3dbb..161f5a85c 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
@@ -50,7 +50,9 @@ public:
void visit(const model::operation::L2NormalizationNode &) override;
void visit(const model::operation::L2Pool2DNode &) override;
void visit(const model::operation::LocalResponseNormalizationNode &) override;
+ void visit(const model::operation::LogicalAndNode &) override;
void visit(const model::operation::LogicalNotNode &) override;
+ void visit(const model::operation::LogicalOrNode &) override;
void visit(const model::operation::LogisticNode &) override;
void visit(const model::operation::LSTMNode &) override;
void visit(const model::operation::MulNode &) override;