summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorInki Dae <inki.dae@samsung.com>2020-09-09 15:59:32 +0900
committerInki Dae <inki.dae@samsung.com>2020-09-09 16:24:56 +0900
commit0f13bac7ee630e6c4693a3d0554c312951151ff9 (patch)
tree92206702922a79d60ff30e9944a5c1bacbbbfafa
parenta5ede2282c1fb9efccb4cd5f4328576586267809 (diff)
downloadarmnn-tizen_6.0.tar.gz
armnn-tizen_6.0.tar.bz2
armnn-tizen_6.0.zip
This patch adds a patch set for hand gesture model support. With this patch set, the hand gesture model from AIC works well. What this patch set does, - Add ArgMax op support - Add ReduceSum op support - Add ReverseV2 op support - Add ReduceMax op support - Add Division op support - Fix wrong axis value for Pack op - Skip ArgMax op for GpuAcc This patch set will be upstreamed to ARMNN mainline. Change-Id: I23967e3f1948cd07b11798389a4d362fbf346a15 Signed-off-by: Inki Dae <inki.dae@samsung.com>
-rw-r--r--packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch350
-rw-r--r--packaging/0002-backends-reference-Add-ReduceSum-op-support.patch869
-rw-r--r--packaging/0003-backends-test-Add-ReduceSum-test-cases.patch399
-rw-r--r--packaging/0004-backends-reference-Add-ReverseV2-op-support.patch912
-rw-r--r--packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch282
-rw-r--r--packaging/0006-backends-reference-Add-ReduceMax-op-support.patch892
-rw-r--r--packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch333
-rw-r--r--packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch98
-rw-r--r--packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch40
-rw-r--r--packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch39
-rw-r--r--packaging/armnn.spec34
11 files changed, 4248 insertions, 0 deletions
diff --git a/packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch b/packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch
new file mode 100644
index 000000000..45aafa3b4
--- /dev/null
+++ b/packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch
@@ -0,0 +1,350 @@
+From c7fd8aa7013557d330e62baf24738c5853000170 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Fri, 4 Sep 2020 15:19:36 +0900
+Subject: [PATCH 01/10] backends/reference: Add ArgMinMax op support
+
+This patch adds ArgMinMax op support.
+
+Current ARMNN has ArgMinMax op but not used, and
+it doesn't support int64 type for output tensor of this op.
+
+So this patch adds a new type, int64 tensor type, and
+also adds ArgMinMax computation function for int64 type.
+
+In default, tensorflow lite parser parses int64 type for output tensor
+of ArgMinMax op so this patch makes a proper function - ArgMinMax op
+for int64 or int32 - according to parsed output_type value.
+
+With this patch, ARMNN supports both types - int64 and int32 - for
+ArgMinMax op.
+
+Change-Id: Ife213835a5531b99f32dbf73a73909e108dde89c
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ include/armnn/Descriptors.hpp | 5 ++-
+ include/armnn/Types.hpp | 1 +
+ include/armnn/TypesUtils.hpp | 2 +
+ src/armnnTfLiteParser/TfLiteParser.cpp | 46 +++++++++++++++++++++-
+ src/armnnTfLiteParser/TfLiteParser.hpp | 1 +
+ src/backends/aclCommon/ArmComputeTensorUtils.cpp | 2 +
+ src/backends/backendsCommon/WorkloadData.cpp | 8 +---
+ src/backends/reference/test/ArgMinMaxTests.cpp | 12 +++---
+ src/backends/reference/workloads/ArgMinMax.cpp | 33 ++++++++++++++++
+ src/backends/reference/workloads/ArgMinMax.hpp | 3 ++
+ src/backends/reference/workloads/CMakeLists.txt | 2 -
+ .../reference/workloads/RefArgMinMaxWorkload.cpp | 13 ++++--
+ 12 files changed, 108 insertions(+), 20 deletions(-)
+
+diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
+index 13765e8..364e447 100644
+--- a/include/armnn/Descriptors.hpp
++++ b/include/armnn/Descriptors.hpp
+@@ -53,17 +53,20 @@ struct ArgMinMaxDescriptor
+ ArgMinMaxDescriptor()
+ : m_Function(ArgMinMaxFunction::Min)
+ , m_Axis(-1)
++ , m_Output_Type(armnn::DataType::Signed64)
+ {}
+
+ bool operator ==(const ArgMinMaxDescriptor &rhs) const
+ {
+- return m_Function == rhs.m_Function && m_Axis == rhs.m_Axis;
++ return m_Function == rhs.m_Function && m_Axis == rhs.m_Axis && m_Output_Type == rhs.m_Output_Type;
+ }
+
+ /// Specify if the function is to find Min or Max.
+ ArgMinMaxFunction m_Function;
+ /// Axis to reduce across the input tensor.
+ int m_Axis;
++ // Tensor data type and this could be int32 or int64. Default type is int64.
++ armnn::DataType m_Output_Type;
+ };
+
+ /// A ComparisonDescriptor for the ComparisonLayer
+diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp
+index e58cecf..390288f 100644
+--- a/include/armnn/Types.hpp
++++ b/include/armnn/Types.hpp
+@@ -41,6 +41,7 @@ enum class DataType
+ QSymmS8 = 7,
+ QAsymmS8 = 8,
+ BFloat16 = 9,
++ Signed64 = 10,
+
+ QuantisedAsymm8 ARMNN_DEPRECATED_ENUM_MSG("Use DataType::QAsymmU8 instead.") = QAsymmU8,
+ QuantisedSymm16 ARMNN_DEPRECATED_ENUM_MSG("Use DataType::QSymmS16 instead.") = QSymmS16
+diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
+index 5065152..decd04e 100644
+--- a/include/armnn/TypesUtils.hpp
++++ b/include/armnn/TypesUtils.hpp
+@@ -120,6 +120,7 @@ constexpr unsigned int GetDataTypeSize(DataType dataType)
+ case DataType::Float16: return 2U;
+ case DataType::Float32:
+ case DataType::Signed32: return 4U;
++ case DataType::Signed64: return 8U;
+ case DataType::QAsymmU8: return 1U;
+ case DataType::QAsymmS8: return 1U;
+ case DataType::QSymmS8: return 1U;
+@@ -171,6 +172,7 @@ constexpr const char* GetDataTypeName(DataType dataType)
+ {
+ case DataType::Float16: return "Float16";
+ case DataType::Float32: return "Float32";
++ case DataType::Signed64: return "Signed64";
+ case DataType::QAsymmU8: return "QAsymmU8";
+ case DataType::QAsymmS8: return "QAsymmS8";
+ case DataType::QSymmS8: return "QSymmS8";
+diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
+index 21b1bce..6c1a64b 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.cpp
++++ b/src/armnnTfLiteParser/TfLiteParser.cpp
+@@ -342,7 +342,9 @@ armnn::TensorInfo ToTensorInfo(TfLiteParser::TensorRawPtr tensorPtr, const std::
+ case tflite::TensorType_INT32:
+ type = armnn::DataType::Signed32;
+ break;
+-
++ case tflite::TensorType_INT64:
++ type = armnn::DataType::Signed64;
++ break;
+ default:
+ {
+ CheckLocation location = CHECK_LOCATION();
+@@ -529,6 +531,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o
+ m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE] = &TfLiteParser::ParseTranspose;
+ m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV] = &TfLiteParser::ParseTransposeConv;
+ m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack;
++ m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax;
+
+ // register supported custom operators
+ m_CustomParserFunctions["TFLite_Detection_PostProcess"] = &TfLiteParser::ParseDetectionPostProcess;
+@@ -2728,6 +2731,47 @@ void TfLiteParser::ParseSplitV(size_t subgraphIndex, size_t operatorIndex)
+ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
+ }
+
++void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex)
++{
++ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
++ const auto *options = operatorPtr->builtin_options.AsArgMaxOptions();
++
++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(inputs.size(), 2);
++
++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(outputs.size(), 1);
++
++ auto layerName = boost::str(boost::format("ArgMax:%1%:%2%") % subgraphIndex % operatorIndex);
++
++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]);
++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]);
++
++ // Get const axis value from model and set it to descriptor.
++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer);
++
++ ArgMinMaxDescriptor desc;
++ desc.m_Axis = axisBufferPtr->data.data()[0];
++ // If output_type is int32 then set Signed32 else Signed64. Default type is Signed64.
++ desc.m_Output_Type = options->output_type == 3 ? armnn::DataType::Signed32 : armnn::DataType::Signed64;
++ desc.m_Function = ArgMinMaxFunction::Max;
++
++ // Register a ArgMax layer.
++ IConnectableLayer *layer = m_Network->AddArgMinMaxLayer(desc, layerName.c_str());
++
++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
++
++ // Register input tensor to the layer.
++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
++
++ // Register output tensor to the layer.
++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
++}
++
+ armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer,
+ unsigned int outputSlot,
+ tflite::ActivationFunctionType activationType)
+diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
+index c72f7ad..478edad 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.hpp
++++ b/src/armnnTfLiteParser/TfLiteParser.hpp
+@@ -133,6 +133,7 @@ private:
+ void ParseTranspose(size_t subgraphIndex, size_t operatorIndex);
+ void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex);
+ void ParseUnpack(size_t subgraphIndex, size_t operatorIndex);
++ void ParseArgMax(size_t subgraphIndex, size_t operatorIndex);
+
+ void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot);
+ void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot);
+diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+index 00ebc9c..d8b8bab 100644
+--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
++++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+@@ -31,6 +31,8 @@ arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multi
+ return arm_compute::DataType::QASYMM8;
+ case armnn::DataType::QSymmS16:
+ return arm_compute::DataType::QSYMM16;
++ case armnn::DataType::Signed64:
++ return arm_compute::DataType::S64;
+ case armnn::DataType::QSymmS8:
+ {
+ return multiScales ? arm_compute::DataType::QSYMM8_PER_CHANNEL : arm_compute::DataType::QSYMM8;
+diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
+index 8f751c4..69bebbc 100644
+--- a/src/backends/backendsCommon/WorkloadData.cpp
++++ b/src/backends/backendsCommon/WorkloadData.cpp
+@@ -623,11 +623,6 @@ void ArgMinMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
+ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+- if (outputTensorInfo.GetDataType() != DataType::Signed32)
+- {
+- throw InvalidArgumentException(descriptorName + ": Output of ArgMinMax layer must be Int32.");
+- }
+-
+ std::vector<DataType> supportedInputTypes =
+ {
+ DataType::BFloat16,
+@@ -636,7 +631,8 @@ void ArgMinMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+ DataType::QAsymmS8,
+ DataType::QAsymmU8,
+ DataType::QSymmS16,
+- DataType::Signed32
++ DataType::Signed32,
++ DataType::Signed64
+ };
+
+ ValidateDataTypes(inputTensorInfo, supportedInputTypes, descriptorName);
+diff --git a/src/backends/reference/test/ArgMinMaxTests.cpp b/src/backends/reference/test/ArgMinMaxTests.cpp
+index 201a2c0..dce15b2 100644
+--- a/src/backends/reference/test/ArgMinMaxTests.cpp
++++ b/src/backends/reference/test/ArgMinMaxTests.cpp
+@@ -12,11 +12,11 @@ BOOST_AUTO_TEST_SUITE(RefArgMinMax)
+ BOOST_AUTO_TEST_CASE(ArgMinTest)
+ {
+ const armnn::TensorInfo inputInfo({ 1, 2, 3 } , armnn::DataType::Float32);
+- const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Float32);
++ const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Signed64);
+
+ std::vector<float> inputValues({ 1.0f, 5.0f, 3.0f, 4.0f, 2.0f, 6.0f});
+- std::vector<int32_t> outputValues(outputInfo.GetNumElements());
+- std::vector<int32_t> expectedValues({ 0, 1, 0 });
++ std::vector<int64_t> outputValues(outputInfo.GetNumElements());
++ std::vector<int64_t> expectedValues({ 0, 1, 0 });
+
+ ArgMinMax(*armnn::MakeDecoder<float>(inputInfo, inputValues.data()),
+ outputValues.data(),
+@@ -35,11 +35,11 @@ BOOST_AUTO_TEST_CASE(ArgMinTest)
+ BOOST_AUTO_TEST_CASE(ArgMaxTest)
+ {
+ const armnn::TensorInfo inputInfo({ 1, 2, 3 } , armnn::DataType::Float32);
+- const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Float32);
++ const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Signed64);
+
+ std::vector<float> inputValues({ 1.0f, 5.0f, 3.0f, 4.0f, 2.0f, 6.0f });
+- std::vector<int32_t> outputValues(outputInfo.GetNumElements());
+- std::vector<int32_t> expectedValues({ 1, 0, 1 });
++ std::vector<int64_t> outputValues(outputInfo.GetNumElements());
++ std::vector<int64_t> expectedValues({ 1, 0, 1 });
+
+ ArgMinMax(*armnn::MakeDecoder<float>(inputInfo, inputValues.data()),
+ outputValues.data(),
+diff --git a/src/backends/reference/workloads/ArgMinMax.cpp b/src/backends/reference/workloads/ArgMinMax.cpp
+index 637aa17..308a869 100644
+--- a/src/backends/reference/workloads/ArgMinMax.cpp
++++ b/src/backends/reference/workloads/ArgMinMax.cpp
+@@ -12,6 +12,39 @@
+ namespace armnn
+ {
+
++void ArgMinMax(Decoder<float>& in, int64_t* out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis)
++{
++ IgnoreUnused(outputTensorInfo);
++
++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
++
++ const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
++ const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
++ const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
++ uAxis + 1,
++ inputTensorInfo.GetNumDimensions());
++
++ for (unsigned int outer = 0; outer < outerElements; ++outer) {
++ for (unsigned int inner = 0; inner < innerElements; ++inner) {
++ in[outer * axisSize * innerElements + inner];
++ auto tmpValue = in.Get();
++ unsigned int tmpIndex = 0;
++ for (unsigned int i = 1; i < axisSize; ++i) {
++ in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
++ const auto& value = in.Get();
++ if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) ||
++ (function == armnn::ArgMinMaxFunction::Max && value > tmpValue)) {
++ tmpValue = value;
++ tmpIndex = i;
++ }
++ }
++
++ out[outer * innerElements + inner] = boost::numeric_cast<int64_t>(tmpIndex);
++ }
++ }
++}
++
+ void ArgMinMax(Decoder<float>& in, int32_t* out, const TensorInfo& inputTensorInfo,
+ const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis)
+ {
+diff --git a/src/backends/reference/workloads/ArgMinMax.hpp b/src/backends/reference/workloads/ArgMinMax.hpp
+index 5a9c6a8..b4693ee 100644
+--- a/src/backends/reference/workloads/ArgMinMax.hpp
++++ b/src/backends/reference/workloads/ArgMinMax.hpp
+@@ -13,6 +13,9 @@
+ namespace armnn
+ {
+
++void ArgMinMax(Decoder<float>& in, int64_t* out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis);
++
+ void ArgMinMax(Decoder<float>& in, int32_t* out, const TensorInfo& inputTensorInfo,
+ const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis);
+
+diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
+index 94c8513..a8ddd1d 100644
+--- a/src/backends/reference/workloads/CMakeLists.txt
++++ b/src/backends/reference/workloads/CMakeLists.txt
+@@ -5,8 +5,6 @@
+
+ list(APPEND armnnRefBackendWorkloads_sources
+ Abs.hpp
+- ArgMinMax.cpp
+- ArgMinMax.hpp
+ Activation.cpp
+ Activation.hpp
+ ArgMinMax.cpp
+diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
+index 5f1eb73..b7246d5 100644
+--- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
++++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp
+@@ -29,10 +29,15 @@ void RefArgMinMaxWorkload::Execute() const
+
+ const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+- int32_t* output = GetOutputTensorData<int32_t>(0, m_Data);
+-
+- ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function,
+- m_Data.m_Parameters.m_Axis);
++ if (m_Data.m_Parameters.m_Output_Type == armnn::DataType::Signed32) {
++ int32_t *output = GetOutputTensorData<int32_t>(0, m_Data);
++ ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function,
++ m_Data.m_Parameters.m_Axis);
++ } else {
++ int64_t *output = GetOutputTensorData<int64_t>(0, m_Data);
++ ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function,
++ m_Data.m_Parameters.m_Axis);
++ }
+ }
+
+ } //namespace armnn
+\ No newline at end of file
+--
+2.7.4
+
diff --git a/packaging/0002-backends-reference-Add-ReduceSum-op-support.patch b/packaging/0002-backends-reference-Add-ReduceSum-op-support.patch
new file mode 100644
index 000000000..e567e5ace
--- /dev/null
+++ b/packaging/0002-backends-reference-Add-ReduceSum-op-support.patch
@@ -0,0 +1,869 @@
+From 0f6fba191b60093d9059e3ca7489cb24a508d8bc Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Mon, 7 Sep 2020 19:34:36 +0900
+Subject: [PATCH 02/10] backends/reference: Add ReduceSum op support
+
+Change-Id: I322010989be9dc94e48ef5e0e184b977f4cd2427
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ CMakeLists.txt | 2 +
+ include/armnn/Descriptors.hpp | 32 ++++++++++
+ include/armnn/DescriptorsFwd.hpp | 1 +
+ include/armnn/ILayerSupport.hpp | 5 ++
+ include/armnn/ILayerVisitor.hpp | 8 +++
+ include/armnn/INetwork.hpp | 7 +++
+ include/armnn/LayerVisitorBase.hpp | 4 ++
+ src/armnn/InternalTypes.hpp | 3 +-
+ src/armnn/LayersFwd.hpp | 2 +
+ src/armnn/Network.cpp | 6 ++
+ src/armnn/Network.hpp | 4 ++
+ src/armnn/layers/ReduceSumLayer.cpp | 70 ++++++++++++++++++++++
+ src/armnn/layers/ReduceSumLayer.hpp | 48 +++++++++++++++
+ src/armnnTfLiteParser/TfLiteParser.cpp | 45 ++++++++++++++
+ src/armnnTfLiteParser/TfLiteParser.hpp | 1 +
+ src/backends/backendsCommon/LayerSupportBase.cpp | 8 +++
+ src/backends/backendsCommon/LayerSupportBase.hpp | 4 ++
+ src/backends/backendsCommon/WorkloadData.cpp | 27 +++++++++
+ src/backends/backendsCommon/WorkloadData.hpp | 5 ++
+ src/backends/backendsCommon/WorkloadFactory.cpp | 18 ++++++
+ src/backends/backendsCommon/WorkloadFactory.hpp | 2 +
+ src/backends/reference/RefLayerSupport.cpp | 30 ++++++++++
+ src/backends/reference/RefLayerSupport.hpp | 4 ++
+ src/backends/reference/RefWorkloadFactory.cpp | 6 ++
+ src/backends/reference/RefWorkloadFactory.hpp | 4 ++
+ src/backends/reference/workloads/CMakeLists.txt | 4 ++
+ src/backends/reference/workloads/ReduceSum.cpp | 70 ++++++++++++++++++++++
+ src/backends/reference/workloads/ReduceSum.hpp | 20 +++++++
+ .../reference/workloads/RefReduceSumWorkload.cpp | 35 +++++++++++
+ .../reference/workloads/RefReduceSumWorkload.hpp | 21 +++++++
+ src/backends/reference/workloads/RefWorkloads.hpp | 1 +
+ 31 files changed, 496 insertions(+), 1 deletion(-)
+ create mode 100644 src/armnn/layers/ReduceSumLayer.cpp
+ create mode 100644 src/armnn/layers/ReduceSumLayer.hpp
+ create mode 100644 src/backends/reference/workloads/ReduceSum.cpp
+ create mode 100644 src/backends/reference/workloads/ReduceSum.hpp
+ create mode 100644 src/backends/reference/workloads/RefReduceSumWorkload.cpp
+ create mode 100644 src/backends/reference/workloads/RefReduceSumWorkload.hpp
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 5c125e9..962dc2d 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -380,6 +380,8 @@ list(APPEND armnn_sources
+ src/armnn/layers/TransposeConvolution2dLayer.hpp
+ src/armnn/layers/TransposeLayer.hpp
+ src/armnn/layers/TransposeLayer.cpp
++ src/armnn/layers/ReduceSumLayer.hpp
++ src/armnn/layers/ReduceSumLayer.cpp
+ src/armnn/BackendRegistry.cpp
+ src/armnn/BackendSettings.hpp
+ src/armnn/BackendHelper.cpp
+diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
+index 364e447..3651c20 100644
+--- a/include/armnn/Descriptors.hpp
++++ b/include/armnn/Descriptors.hpp
+@@ -1215,4 +1215,36 @@ struct TransposeDescriptor
+ PermutationVector m_DimMappings;
+ };
+
++/// A ReduceSumDescriptor for the REDUCE SUM.
++struct ReduceSumDescriptor
++{
++ ReduceSumDescriptor()
++ : m_TargetWidth(0)
++ , m_TargetHeight(0)
++ , m_KeepDims(0)
++ , m_Axis(0)
++ , m_DataLayout(DataLayout::NCHW)
++ {}
++
++ bool operator ==(const ReduceSumDescriptor& rhs) const
++ {
++ return m_TargetWidth == rhs.m_TargetWidth &&
++ m_TargetHeight == rhs.m_TargetHeight &&
++ m_KeepDims == rhs.m_KeepDims &&
++ m_Axis == rhs.m_Axis &&
++ m_DataLayout == rhs.m_DataLayout;
++ }
++
++ /// Target width value.
++ uint32_t m_TargetWidth;
++ /// Target height value.
++ uint32_t m_TargetHeight;
++ /// if true then output shape has no change.
++ uint32_t m_KeepDims;
++ /// The indices of the dimensions to reduce.
++ int32_t m_Axis;
++ /// The data layout to be used (NCHW, NHWC).
++ DataLayout m_DataLayout;
++};
++
+ } // namespace armnn
+diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
+index f090372..cef85d5 100644
+--- a/include/armnn/DescriptorsFwd.hpp
++++ b/include/armnn/DescriptorsFwd.hpp
+@@ -33,6 +33,7 @@ struct QLstmDescriptor;
+ struct ReshapeDescriptor;
+ struct ResizeBilinearDescriptor;
+ struct ResizeDescriptor;
++struct ReduceSumDescriptor;
+ struct SoftmaxDescriptor;
+ struct SpaceToBatchNdDescriptor;
+ struct SpaceToDepthDescriptor;
+diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
+index 58509c9..0701790 100644
+--- a/include/armnn/ILayerSupport.hpp
++++ b/include/armnn/ILayerSupport.hpp
+@@ -392,6 +392,11 @@ public:
+ const TransposeDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
++ virtual bool IsReduceSumSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceSumDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
++
+ }; // class ILayerSupport
+
+ using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>;
+diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
+index 530e74f..cd57275 100644
+--- a/include/armnn/ILayerVisitor.hpp
++++ b/include/armnn/ILayerVisitor.hpp
+@@ -403,6 +403,14 @@ public:
+ const ResizeDescriptor& resizeDescriptor,
+ const char* name = nullptr) = 0;
+
++ /// Function that a reduce_sum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
++ /// @param layer - pointer to the layer which is calling back to this visit function.
++ /// @param ReduceSumDescriptor - Parameters for the reduce max operation.
++ /// @param name - Optional name for the layer.
++ virtual void VisitReduceSumLayer(const IConnectableLayer* layer,
++ const ReduceSumDescriptor& reducesumDescriptor,
++ const char* name = nullptr) = 0;
++
+ /// Function a Reciprocal of square root layer should call back to when its Accept(ILayerVisitor&)
+ /// function is invoked.
+ /// @param layer - pointer to the layer which is calling back to this visit function.
+diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
+index b840dd5..79ad686 100644
+--- a/include/armnn/INetwork.hpp
++++ b/include/armnn/INetwork.hpp
+@@ -360,6 +360,13 @@ public:
+ virtual IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
+ const char* name = nullptr) = 0;
+
++ /// Adds a reducemax layer to the network.
++ /// @param ReduceSumDescriptor - Parameters for the reducemax operation.
++ /// @param name - Optional name for the layer.
++ /// @return - Interface for configuring the layer.
++ virtual IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducemaxDescriptor,
++ const char* name = nullptr) = 0;
++
+ /// Adds an instance normalization layer to the network.
+ /// @param desc - Parameters for the instance normalization operation.
+ /// @param name - Optional name for the layer.
+diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp
+index 95d6bd3..209ef2c 100644
+--- a/include/armnn/LayerVisitorBase.hpp
++++ b/include/armnn/LayerVisitorBase.hpp
+@@ -204,6 +204,10 @@ public:
+ const ResizeDescriptor&,
+ const char*) override { DefaultPolicy::Apply(__func__); }
+
++ void VisitReduceSumLayer(const IConnectableLayer*,
++ const ReduceSumDescriptor&,
++ const char*) override { DefaultPolicy::Apply(__func__); }
++
+ void VisitRsqrtLayer(const IConnectableLayer*,
+ const char*) override { DefaultPolicy::Apply(__func__); }
+
+diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
+index 455cb60..5f5ee01 100644
+--- a/src/armnn/InternalTypes.hpp
++++ b/src/armnn/InternalTypes.hpp
+@@ -71,7 +71,8 @@
+ X(Subtraction) \
+ X(Switch) \
+ X(Transpose) \
+- X(TransposeConvolution2d)
++ X(TransposeConvolution2d) \
++ X(ReduceSum)
+
+ /// When adding a new layer, adapt also the LastLayer enum value in the
+ /// enum class LayerType below
+diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
+index 2054413..5092828 100644
+--- a/src/armnn/LayersFwd.hpp
++++ b/src/armnn/LayersFwd.hpp
+@@ -54,6 +54,7 @@
+ #include "layers/QuantizedLstmLayer.hpp"
+ #include "layers/ReshapeLayer.hpp"
+ #include "layers/ResizeLayer.hpp"
++#include "layers/ReduceSumLayer.hpp"
+ #include "layers/SliceLayer.hpp"
+ #include "layers/SoftmaxLayer.hpp"
+ #include "layers/SpaceToBatchNdLayer.hpp"
+@@ -142,6 +143,7 @@ DECLARE_LAYER(QLstm)
+ DECLARE_LAYER(QuantizedLstm)
+ DECLARE_LAYER(Reshape)
+ DECLARE_LAYER(Resize)
++DECLARE_LAYER(ReduceSum)
+ DECLARE_LAYER(Slice)
+ DECLARE_LAYER(Softmax)
+ DECLARE_LAYER(SpaceToBatchNd)
+diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
+index a047297..335e104 100644
+--- a/src/armnn/Network.cpp
++++ b/src/armnn/Network.cpp
+@@ -1472,6 +1472,12 @@ resizeDescriptor, const char* name)
+ return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
+ }
+
++IConnectableLayer* Network::AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor,
++ const char* name)
++{
++ return m_Graph->AddLayer<ReduceSumLayer>(reducesumDescriptor, name);
++}
++
+ IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
+ const char* name)
+ {
+diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
+index df4a35f..6c767f3 100644
+--- a/src/armnn/Network.hpp
++++ b/src/armnn/Network.hpp
+@@ -160,6 +160,10 @@ public:
+ IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
+ const char* name = nullptr) override;
+
++ IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor,
++ const char* name = nullptr) override;
++
++
+ IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc,
+ const char* name = nullptr) override;
+
+diff --git a/src/armnn/layers/ReduceSumLayer.cpp b/src/armnn/layers/ReduceSumLayer.cpp
+new file mode 100644
+index 0000000..198289c
+--- /dev/null
++++ b/src/armnn/layers/ReduceSumLayer.cpp
+@@ -0,0 +1,70 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReduceSumLayer.hpp"
++#include "LayerCloneBase.hpp"
++
++#include <armnn/TypesUtils.hpp>
++
++#include <armnnUtils/DataLayoutIndexed.hpp>
++
++#include <backendsCommon/WorkloadData.hpp>
++#include <backendsCommon/WorkloadFactory.hpp>
++
++using namespace armnnUtils;
++
++namespace armnn
++{
++
++ReduceSumLayer::ReduceSumLayer(const ReduceSumDescriptor& param, const char* name)
++ : LayerWithParameters(1, 1, LayerType::ReduceSum, param, name)
++{
++}
++
++std::unique_ptr<IWorkload> ReduceSumLayer::CreateWorkload(const IWorkloadFactory& factory) const
++{
++ ReduceSumQueueDescriptor descriptor;
++ return factory.CreateReduceSum(descriptor, PrepInfoAndDesc(descriptor));
++}
++
++ReduceSumLayer* ReduceSumLayer::Clone(Graph& graph) const
++{
++ return CloneBase<ReduceSumLayer>(graph, m_Param, GetName());
++}
++
++std::vector<TensorShape> ReduceSumLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
++{
++ ARMNN_ASSERT(inputShapes.size() == 1);
++
++ const TensorShape& inputShape = inputShapes[0];
++ const DataLayoutIndexed dimensionIndices = m_Param.m_DataLayout;
++
++ unsigned int outWidth = m_Param.m_TargetWidth;
++ unsigned int outHeight = m_Param.m_TargetHeight;
++ unsigned int outChannels = inputShape[dimensionIndices.GetChannelsIndex()];
++ unsigned int outBatch = inputShape[0];
++
++ TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ?
++ TensorShape( { outBatch, outHeight, outWidth, outChannels } ) :
++ TensorShape( { outBatch, outChannels, outHeight, outWidth });
++
++ return std::vector<TensorShape>({ tensorShape });
++}
++
++void ReduceSumLayer::ValidateTensorShapesFromInputs()
++{
++ VerifyLayerConnections(1, CHECK_LOCATION());
++
++ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
++
++ ARMNN_ASSERT(inferredShapes.size() == 1);
++}
++
++void ReduceSumLayer::Accept(ILayerVisitor& visitor) const
++{
++ visitor.VisitReduceSumLayer(this, GetParameters(), GetName());
++}
++
++} // namespace armnn
+diff --git a/src/armnn/layers/ReduceSumLayer.hpp b/src/armnn/layers/ReduceSumLayer.hpp
+new file mode 100644
+index 0000000..e8fe998
+--- /dev/null
++++ b/src/armnn/layers/ReduceSumLayer.hpp
+@@ -0,0 +1,48 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++#pragma once
++
++#include "LayerWithParameters.hpp"
++
++namespace armnn
++{
++
++/// This layer represents a reducemax operation.
++class ReduceSumLayer : public LayerWithParameters<ReduceSumDescriptor>
++{
++public:
++ /// Makes a workload for the ReduceSum type.
++ /// @param [in] graph The graph where this layer can be found.
++ /// @param [in] factory The workload factory which will create the workload.
++ /// @return A pointer to the created workload, or nullptr if not created.
++ virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override;
++
++ /// Creates a dynamically-allocated copy of this layer.
++ /// @param [in] graph The graph into which this layer is being cloned.
++ ReduceSumLayer* Clone(Graph& graph) const override;
++
++ /// Check if the input tensor shape(s)
++ /// will lead to a valid configuration of @ref ReduceSumLayer.
++ void ValidateTensorShapesFromInputs() override;
++
++ /// By default returns inputShapes if the number of inputs are equal to number of outputs,
++ /// otherwise infers the output shapes from given input shapes and layer properties.
++ /// @param [in] inputShapes The input shapes layer has.
++ /// @return A vector to the inferred output shape.
++ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
++
++ void Accept(ILayerVisitor& visitor) const override;
++
++protected:
++ /// Constructor to create a ReduceSumLayer.
++ /// @param [in] param ReduceSumDescriptor to configure the resize operation.
++ /// @param [in] name Optional name for the layer.
++ ReduceSumLayer(const ReduceSumDescriptor& param, const char* name);
++
++ /// Default destructor
++ ~ReduceSumLayer() = default;
++};
++
++} // namespace armnn
+diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
+index 6c1a64b..e5400dc 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.cpp
++++ b/src/armnnTfLiteParser/TfLiteParser.cpp
+@@ -532,6 +532,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o
+ m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV] = &TfLiteParser::ParseTransposeConv;
+ m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack;
+ m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax;
++ m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum;
+
+ // register supported custom operators
+ m_CustomParserFunctions["TFLite_Detection_PostProcess"] = &TfLiteParser::ParseDetectionPostProcess;
+@@ -2772,6 +2773,50 @@ void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex)
+ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
+ }
+
++void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex)
++{
++ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
++ const auto *options = operatorPtr->builtin_options.AsReducerOptions();
++
++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(inputs.size(), 2);
++
++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(outputs.size(), 1);
++
++ auto layerName = boost::str(boost::format("ReduceSum:%1%:%2%") % subgraphIndex % operatorIndex);
++
++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]);
++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]);
++
++ TensorShape shape = sizeTensorInfo0.GetShape();
++
++ // Get const axis value from model and set it to descriptor.
++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer);
++
++ ReduceSumDescriptor desc;
++ desc.m_KeepDims = options->keep_dims;
++ desc.m_Axis = axisBufferPtr->data.data()[4];
++ desc.m_TargetHeight = shape[1];
++ desc.m_TargetWidth = shape[2];
++ desc.m_DataLayout = armnn::DataLayout::NHWC;
++
++ // Register a new layer object, ReduceMax, to in-memory network of ARMNN.
++ IConnectableLayer *layer = m_Network->AddReduceSumLayer(desc, layerName.c_str());
++
++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
++
++ // Register input tensor to the layer.
++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
++
++ // Register output tensor to the layer.
++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
++}
++
+ armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer,
+ unsigned int outputSlot,
+ tflite::ActivationFunctionType activationType)
+diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
+index 478edad..13d1cb4 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.hpp
++++ b/src/armnnTfLiteParser/TfLiteParser.hpp
+@@ -134,6 +134,7 @@ private:
+ void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex);
+ void ParseUnpack(size_t subgraphIndex, size_t operatorIndex);
+ void ParseArgMax(size_t subgraphIndex, size_t operatorIndex);
++ void ParseSum(size_t subgraphIndex, size_t operatorIndex);
+
+ void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot);
+ void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot);
+diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
+index c55f51d..245b165 100644
+--- a/src/backends/backendsCommon/LayerSupportBase.cpp
++++ b/src/backends/backendsCommon/LayerSupportBase.cpp
+@@ -615,4 +615,12 @@ bool LayerSupportBase::IsTransposeSupported(const TensorInfo& /*input*/,
+ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+ }
+
++bool LayerSupportBase::IsReduceSumSupported(const TensorInfo& /*input*/,
++ const TensorInfo& /*output*/,
++ const ReduceSumDescriptor& /*descriptor*/,
++ Optional<std::string&> reasonIfUnsupported) const
++{
++ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
++}
++
+ } // namespace armnn
+diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
+index fcc3326..9b39f8f 100644
+--- a/src/backends/backendsCommon/LayerSupportBase.hpp
++++ b/src/backends/backendsCommon/LayerSupportBase.hpp
+@@ -377,6 +377,10 @@ public:
+ const TransposeDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
++ bool IsReduceSumSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceSumDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ };
+
+ } // namespace armnn
+diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
+index 69bebbc..7455ab5 100644
+--- a/src/backends/backendsCommon/WorkloadData.cpp
++++ b/src/backends/backendsCommon/WorkloadData.cpp
+@@ -3483,4 +3483,31 @@ void ElementwiseUnaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo)
+ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+ }
+
++void ReduceSumQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
++{
++ const std::string descriptorName{"ReduceSumQueueDescriptor"};
++
++ ValidateNumInputs(workloadInfo, descriptorName, 1);
++ ValidateNumOutputs(workloadInfo, descriptorName, 1);
++
++ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
++ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
++
++ ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input");
++
++ std::vector<DataType> supportedTypes =
++ {
++ DataType::BFloat16,
++ DataType::Float16,
++ DataType::Float32,
++ DataType::QAsymmS8,
++ DataType::QAsymmU8,
++ DataType::QSymmS16,
++ DataType::Signed32
++ };
++
++ ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName);
++ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
++}
++
+ } // namespace armnn
+diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
+index 9239f4a..6f203b5 100644
+--- a/src/backends/backendsCommon/WorkloadData.hpp
++++ b/src/backends/backendsCommon/WorkloadData.hpp
+@@ -634,4 +634,9 @@ struct ElementwiseUnaryQueueDescriptor : QueueDescriptorWithParameters<Elementwi
+ void Validate(const WorkloadInfo& workloadInfo) const;
+ };
+
++struct ReduceSumQueueDescriptor : QueueDescriptorWithParameters<ReduceSumDescriptor>
++{
++ void Validate(const WorkloadInfo& workloadInfo) const;
++};
++
+ } // namespace armnn
+diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
+index 34bfd7c..b7195f7 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.cpp
++++ b/src/backends/backendsCommon/WorkloadFactory.cpp
+@@ -1178,6 +1178,18 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+
+ break;
+ }
++ case LayerType::ReduceSum:
++ {
++ auto cLayer = PolymorphicDowncast<const ReduceSumLayer*>(&layer);
++ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
++ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
++
++ result = layerSupportObject->IsReduceSumSupported(OverrideDataType(input, dataType),
++ OverrideDataType(output, dataType),
++ cLayer->GetParameters(),
++ reason);
++ break;
++ }
+ default:
+ {
+ ARMNN_ASSERT_MSG(false, "WorkloadFactory did not recognise type of layer.");
+@@ -1584,4 +1596,10 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateTransposeConvolution2d(
+ return std::unique_ptr<IWorkload>();
+ }
+
++std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& /*descriptor*/,
++ const WorkloadInfo& /*info*/) const
++{
++ return std::unique_ptr<IWorkload>();
++}
++
+ } // namepsace armnn
+diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
+index 98a6c36..0d98c92 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.hpp
++++ b/src/backends/backendsCommon/WorkloadFactory.hpp
+@@ -250,6 +250,8 @@ public:
+ virtual std::unique_ptr<IWorkload> CreateTransposeConvolution2d(
+ const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
++ virtual std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
++ const WorkloadInfo& info) const;
+ };
+
+ } // namespace armnn
+diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
+index 034cd12..333ad4d 100644
+--- a/src/backends/reference/RefLayerSupport.cpp
++++ b/src/backends/reference/RefLayerSupport.cpp
+@@ -2132,4 +2132,34 @@ bool RefLayerSupport::IsTransposeSupported(const TensorInfo& input,
+ return supported;
+ }
+
++bool RefLayerSupport::IsReduceSumSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceSumDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported) const
++{
++ IgnoreUnused(descriptor);
++ bool supported = true;
++ std::array<DataType,7> supportedTypes =
++ {
++ DataType::BFloat16,
++ DataType::Float32,
++ DataType::Float16,
++ DataType::QAsymmS8,
++ DataType::QAsymmU8,
++ DataType::QSymmS16,
++ DataType::Signed32
++ };
++
++ supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
++ "Reference ReduceSum: input type not supported");
++
++ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
++ "Reference ReduceSum: output type not supported");
++
++ supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
++ "Reference ReduceSum: input and output types not matching");
++
++ return supported;
++}
++
+ } // namespace armnn
+diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
+index eb89946..766ddfa 100644
+--- a/src/backends/reference/RefLayerSupport.hpp
++++ b/src/backends/reference/RefLayerSupport.hpp
+@@ -342,6 +342,10 @@ public:
+ const TransposeDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
++ bool IsReduceSumSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceSumDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ };
+
+ } // namespace armnn
+diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
+index 5ce997c..9602df5 100644
+--- a/src/backends/reference/RefWorkloadFactory.cpp
++++ b/src/backends/reference/RefWorkloadFactory.cpp
+@@ -632,4 +632,10 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
+ return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
+ }
+
++std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
++ const WorkloadInfo& info) const
++{
++ return std::make_unique<RefReduceSumWorkload>(descriptor, info);
++}
++
+ } // namespace armnn
+diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
+index 1c607c0..93cab9a 100644
+--- a/src/backends/reference/RefWorkloadFactory.hpp
++++ b/src/backends/reference/RefWorkloadFactory.hpp
+@@ -250,6 +250,10 @@ public:
+ std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
++ std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
++ const WorkloadInfo& info) const override;
++
++
+ private:
+ template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
+ std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const;
+diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
+index a8ddd1d..d5eceff 100644
+--- a/src/backends/reference/workloads/CMakeLists.txt
++++ b/src/backends/reference/workloads/CMakeLists.txt
+@@ -49,6 +49,8 @@ list(APPEND armnnRefBackendWorkloads_sources
+ Minimum.hpp
+ Pad.cpp
+ Pad.hpp
++ ReduceSum.cpp
++ ReduceSum.hpp
+ Pooling2d.cpp
+ Pooling2d.hpp
+ PreluImpl.cpp
+@@ -172,6 +174,8 @@ list(APPEND armnnRefBackendWorkloads_sources
+ TensorBufferArrayView.hpp
+ TransposeConvolution2d.cpp
+ TransposeConvolution2d.hpp
++ RefReduceSumWorkload.cpp
++ RefReduceSumWorkload.hpp
+ )
+
+ add_library(armnnRefBackendWorkloads OBJECT ${armnnRefBackendWorkloads_sources})
+diff --git a/src/backends/reference/workloads/ReduceSum.cpp b/src/backends/reference/workloads/ReduceSum.cpp
+new file mode 100644
+index 0000000..475e163
+--- /dev/null
++++ b/src/backends/reference/workloads/ReduceSum.cpp
+@@ -0,0 +1,70 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReduceSum.hpp"
++
++#include <armnnUtils/TensorUtils.hpp>
++
++#include <boost/numeric/conversion/cast.hpp>
++#include <iostream>
++
++namespace armnn
++{
++
++void ReduceSum(Decoder<float>& in, float *out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, int32_t axis)
++{
++ IgnoreUnused(outputTensorInfo);
++
++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
++ const unsigned int batchSize = inputTensorInfo.GetShape()[0];
++ const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
++ const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
++ const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
++ uAxis + 1,
++ inputTensorInfo.GetNumDimensions());
++
++ // Workaround code, and it is performed if below condition is met,
++ // batch size > 0 and
++ // axis == 2 and
++ // input shape size == 4
++ if (batchSize > 0 && uAxis == 2 && inputTensorInfo.GetShape().GetNumDimensions() == 4) {
++ unsigned int height = inputTensorInfo.GetShape()[1];
++ unsigned int width = inputTensorInfo.GetShape()[2];
++ unsigned int channel = inputTensorInfo.GetShape()[3];
++ float sumValue = 0.0f;
++
++ for (unsigned int b = 0; b < batchSize; ++b) {
++ for (unsigned int c = 0; c < channel; ++c) {
++ for (unsigned int y = 0; y < height; ++y) {
++ for (unsigned int x = 0; x < width; ++x) {
++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c];
++ sumValue += in.Get();
++ }
++ }
++
++ out[b * channel + c] = sumValue;
++ sumValue = 0.0f;
++ }
++ }
++
++ return;
++ }
++
++ for (unsigned int outer = 0; outer < outerElements; ++outer) {
++ for (unsigned int inner = 0; inner < innerElements; ++inner) {
++ in[outer * axisSize * innerElements + inner];
++ auto tmpValue = in.Get();
++ for (unsigned int i = 1; i < axisSize; ++i) {
++ in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
++ const auto& value = in.Get();
++ tmpValue += value;
++ }
++ out[outer * innerElements + inner] = tmpValue;
++ }
++ }
++}
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/ReduceSum.hpp b/src/backends/reference/workloads/ReduceSum.hpp
+new file mode 100644
+index 0000000..cfaf347
+--- /dev/null
++++ b/src/backends/reference/workloads/ReduceSum.hpp
+@@ -0,0 +1,20 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include "armnn/Tensor.hpp"
++#include "armnn/Descriptors.hpp"
++
++#include "Decoders.hpp"
++
++namespace armnn
++{
++
++void ReduceSum(Decoder<float>& in, float* out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, int32_t axis);
++
++} //namespace armnn
++
+diff --git a/src/backends/reference/workloads/RefReduceSumWorkload.cpp b/src/backends/reference/workloads/RefReduceSumWorkload.cpp
+new file mode 100644
+index 0000000..4ea91a6
+--- /dev/null
++++ b/src/backends/reference/workloads/RefReduceSumWorkload.cpp
+@@ -0,0 +1,35 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "RefReduceSumWorkload.hpp"
++
++#include "ReduceSum.hpp"
++#include "RefWorkloadUtils.hpp"
++#include "BaseIterator.hpp"
++#include "Profiling.hpp"
++
++#include "BaseIterator.hpp"
++#include "Decoders.hpp"
++#include "Encoders.hpp"
++
++namespace armnn
++{
++
++void RefReduceSumWorkload::Execute() const
++{
++ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceSumWorkload_Execute");
++
++ const TensorInfo& inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
++ const TensorInfo& outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
++
++ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map());
++ Decoder<float> &decoder = *decoderPtr;
++
++ float *output = GetOutputTensorData<float>(0, m_Data);
++
++ ReduceSum(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Axis);
++}
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/RefReduceSumWorkload.hpp b/src/backends/reference/workloads/RefReduceSumWorkload.hpp
+new file mode 100644
+index 0000000..102b9f3
+--- /dev/null
++++ b/src/backends/reference/workloads/RefReduceSumWorkload.hpp
+@@ -0,0 +1,21 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include <backendsCommon/Workload.hpp>
++#include <backendsCommon/WorkloadData.hpp>
++
++namespace armnn
++{
++
++class RefReduceSumWorkload : public BaseWorkload<ReduceSumQueueDescriptor>
++{
++public:
++ using BaseWorkload<ReduceSumQueueDescriptor>::BaseWorkload;
++ virtual void Execute() const override;
++};
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
+index e396a6b..c80ed43 100644
+--- a/src/backends/reference/workloads/RefWorkloads.hpp
++++ b/src/backends/reference/workloads/RefWorkloads.hpp
+@@ -67,3 +67,4 @@
+ #include "Softmax.hpp"
+ #include "Splitter.hpp"
+ #include "TensorBufferArrayView.hpp"
++#include "RefReduceSumWorkload.hpp"
+--
+2.7.4
+
diff --git a/packaging/0003-backends-test-Add-ReduceSum-test-cases.patch b/packaging/0003-backends-test-Add-ReduceSum-test-cases.patch
new file mode 100644
index 000000000..989be5b57
--- /dev/null
+++ b/packaging/0003-backends-test-Add-ReduceSum-test-cases.patch
@@ -0,0 +1,399 @@
+From 7a6c7409021a64749b8792ea069d81463c5ee98c Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Mon, 7 Sep 2020 20:17:38 +0900
+Subject: [PATCH 03/10] backends/test: Add ReduceSum test cases
+
+Change-Id: Ic6d02e0e51908958cd1499f0d0f22146763574ee
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ src/backends/backendsCommon/test/CMakeLists.txt | 2 +
+ src/backends/backendsCommon/test/LayerTests.hpp | 1 +
+ .../test/layerTests/ReduceSumTestImpl.cpp | 293 +++++++++++++++++++++
+ .../test/layerTests/ReduceSumTestImpl.hpp | 33 +++
+ src/backends/reference/test/RefLayerTests.cpp | 7 +
+ 5 files changed, 336 insertions(+)
+ create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
+ create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
+
+diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
+index dd96d36..951a46d 100644
+--- a/src/backends/backendsCommon/test/CMakeLists.txt
++++ b/src/backends/backendsCommon/test/CMakeLists.txt
+@@ -127,6 +127,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources
+ layerTests/PreluTestImpl.hpp
+ layerTests/QuantizeTestImpl.cpp
+ layerTests/QuantizeTestImpl.hpp
++ layerTests/ReduceSumTestImpl.cpp
++ layerTests/ReduceSumTestImpl.hpp
+ layerTests/ReshapeTestImpl.cpp
+ layerTests/ReshapeTestImpl.hpp
+ layerTests/ResizeTestImpl.cpp
+diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
+index 247ed12..25f411f 100644
+--- a/src/backends/backendsCommon/test/LayerTests.hpp
++++ b/src/backends/backendsCommon/test/LayerTests.hpp
+@@ -44,6 +44,7 @@
+ #include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
++#include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp>
+diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
+new file mode 100644
+index 0000000..4d698df
+--- /dev/null
++++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
+@@ -0,0 +1,293 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReduceSumTestImpl.hpp"
++
++#include <backendsCommon/test/DataTypeUtils.hpp>
++#include <backendsCommon/test/TensorCopyUtils.hpp>
++#include <backendsCommon/test/WorkloadTestUtils.hpp>
++
++#include <test/TensorHelpers.hpp>
++
++namespace
++{
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceSumTestCommon(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
++ const armnn::TensorInfo inputTensorInfo,
++ const armnn::TensorInfo outputTensorInfo,
++ const std::vector<float>& inputData,
++ const std::vector<float>& outputData,
++ int axis = 3)
++{
++ IgnoreUnused(memoryManager);
++ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
++
++ LayerTestResult<float, 4> result(outputTensorInfo);
++ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
++
++ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
++ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
++
++ armnn::ReduceSumQueueDescriptor descriptor;
++ unsigned int updated_idx = static_cast<uint32_t>(axis);
++ if (axis < 0) {
++ updated_idx = static_cast<uint32_t>(static_cast<int32_t>(inputTensorInfo.GetNumDimensions()) + axis);
++ }
++
++ descriptor.m_Parameters.m_Axis = updated_idx;
++ descriptor.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW;
++ armnn::WorkloadInfo info;
++
++ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
++ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
++
++ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduceSum(descriptor, info);
++
++ inputHandle->Allocate();
++ outputHandle->Allocate();
++
++ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin());
++
++ workload->Execute();
++
++ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
++
++ return result;
++}
++
++} // namespace
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceSumSimpleTest(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 1, 1, 5 };
++ const armnn::TensorShape outputShape{ 1, 1, 1, 1};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f });
++ std::vector<float> outputValues({ 34.0f });
++
++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, -1);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceSumMultiChannel_1Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
++ const armnn::TensorShape outputShape{ 1, 1, 2, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++ std::vector<float> outputValues({ 111.0f, 222.0f, 333.0f, 444.0f,
++ 555.0f, 666.0f, 777.0f, 888.0f });
++
++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 1);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceSumMultiChannel_2Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
++ const armnn::TensorShape outputShape{ 1, 1, 1, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++ std::vector<float> outputValues({ 666.0f, 888.0f, 1110.0f, 1332.0f });
++
++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 2);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannelTest(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 3, 2, 3, 4 };
++ const armnn::TensorShape outputShape{ 3, 1, 1, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues( {7, 8, 6, 1,
++ 1, 1, 8, 7,
++ 3, 7, 7, 7,
++
++ 6, 8, 4, 7,
++ 3, 8, 7, 3,
++ 5, 8, 8, 8,
++
++
++ 7, 8, 2, 7,
++ 3, 8, 5, 6,
++ 8, 4, 2, 7,
++
++ 1, 6, 7, 2,
++ 8, 3, 3, 1,
++ 7, 6, 2, 6,
++
++
++ 5, 3, 4, 8,
++ 7, 8, 2, 4,
++ 6, 6, 2, 8,
++
++ 2, 2, 7, 2,
++ 5, 3, 6, 3,
++ 6, 1, 8, 8});
++ std::vector<float> outputValues({ 25.0f, 40.0f, 40.0f, 33.0f,
++ 34.0f, 35.0f, 21.0f, 29.0f,
++ 31.0f, 23.0f, 29.0f, 33.0f});
++
++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 2);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannel_2Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 3, 2, 3, 4 };
++ const armnn::TensorShape outputShape{ 3, 2, 3, 1};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues( {7, 8, 6, 1,
++ 1, 1, 8, 7,
++ 3, 7, 7, 7,
++
++ 6, 8, 4, 7,
++ 3, 8, 7, 3,
++ 5, 8, 8, 8,
++
++
++ 7, 8, 2, 7,
++ 3, 8, 5, 6,
++ 8, 4, 2, 7,
++
++ 1, 6, 7, 2,
++ 8, 3, 3, 1,
++ 7, 6, 2, 6,
++
++
++ 5, 3, 4, 8,
++ 7, 8, 2, 4,
++ 6, 6, 2, 8,
++
++ 2, 2, 7, 2,
++ 5, 3, 6, 3,
++ 6, 1, 8, 8});
++ std::vector<float> outputValues({ 22.0f, 17.0f, 24.0f,
++ 25.0f, 21.0f, 29.0f,
++
++ 24.0f, 22.0f, 21.0f,
++ 16.0f, 15.0f, 21.0f,
++
++ 20.0f, 21.0f, 22.0f,
++ 13.0f, 17.0f, 23.0f});
++
++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 3);
++}
++
++
++// Explicit template specializations
++
++template LayerTestResult<float, 4>
++ReduceSumSimpleTest<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceSumMultiChannel_1Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceSumMultiChannel_2Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceSumMultiBatchAndChannelTest<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceSumMultiBatchAndChannel_2Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
+new file mode 100644
+index 0000000..01d1a44
+--- /dev/null
++++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp
+@@ -0,0 +1,33 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include "LayerTestResult.hpp"
++
++#include <ResolveType.hpp>
++
++#include <armnn/backends/IBackendInternal.hpp>
++#include <backendsCommon/WorkloadFactory.hpp>
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceSumSimpleTest(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceSumMultiChannel_1Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceSumMultiChannel_2Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannelTest(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannel_2Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
+index d8dab3d..9461e2a 100644
+--- a/src/backends/reference/test/RefLayerTests.cpp
++++ b/src/backends/reference/test/RefLayerTests.cpp
+@@ -1980,4 +1980,11 @@ ARMNN_AUTO_TEST_CASE(Neg3dQuantisedAsymm8, Neg3dTest<DataType::QAsymmU8>)
+ ARMNN_AUTO_TEST_CASE(Neg2dQuantisedSymm16, Neg2dTest<DataType::QSymmS16>)
+ ARMNN_AUTO_TEST_CASE(Neg3dQuantisedSymm16, Neg3dTest<DataType::QSymmS16>)
+
++// ReduceSum
++ARMNN_AUTO_TEST_CASE(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannelFloat32, ReduceSumMultiChannel_1Test<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannel2Float32, ReduceSumMultiChannel_2Test<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannelFloat32, ReduceSumMultiBatchAndChannelTest<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannel_2Float32, ReduceSumMultiBatchAndChannel_2Test<DataType::Float32>)
++
+ BOOST_AUTO_TEST_SUITE_END()
+--
+2.7.4
+
diff --git a/packaging/0004-backends-reference-Add-ReverseV2-op-support.patch b/packaging/0004-backends-reference-Add-ReverseV2-op-support.patch
new file mode 100644
index 000000000..351923b31
--- /dev/null
+++ b/packaging/0004-backends-reference-Add-ReverseV2-op-support.patch
@@ -0,0 +1,912 @@
+From 8b90c253ef5bf33537a40d73492c6fe320d03546 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Tue, 8 Sep 2020 10:31:11 +0900
+Subject: [PATCH 04/10] backends/reference: Add ReverseV2 op support
+
+Change-Id: I0cb1a6fe670e5ff5f9b21b62ff03d3579b956ef7
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ CMakeLists.txt | 2 +
+ include/armnn/Descriptors.hpp | 28 ++++++++
+ include/armnn/DescriptorsFwd.hpp | 1 +
+ include/armnn/ILayerSupport.hpp | 5 ++
+ include/armnn/ILayerVisitor.hpp | 8 +++
+ include/armnn/INetwork.hpp | 7 ++
+ include/armnn/LayerVisitorBase.hpp | 4 ++
+ src/armnn/InternalTypes.hpp | 1 +
+ src/armnn/LayersFwd.hpp | 2 +
+ src/armnn/Network.cpp | 6 ++
+ src/armnn/Network.hpp | 3 +
+ src/armnn/layers/ReverseV2Layer.cpp | 75 ++++++++++++++++++++
+ src/armnn/layers/ReverseV2Layer.hpp | 48 +++++++++++++
+ src/armnnTfLiteParser/TfLiteParser.cpp | 41 +++++++++++
+ src/armnnTfLiteParser/TfLiteParser.hpp | 1 +
+ src/backends/backendsCommon/LayerSupportBase.cpp | 8 +++
+ src/backends/backendsCommon/LayerSupportBase.hpp | 5 ++
+ src/backends/backendsCommon/WorkloadData.cpp | 49 +++++++++++++
+ src/backends/backendsCommon/WorkloadData.hpp | 5 ++
+ src/backends/backendsCommon/WorkloadFactory.cpp | 18 +++++
+ src/backends/backendsCommon/WorkloadFactory.hpp | 4 ++
+ src/backends/reference/RefLayerSupport.cpp | 29 ++++++++
+ src/backends/reference/RefLayerSupport.hpp | 5 ++
+ src/backends/reference/RefWorkloadFactory.cpp | 6 ++
+ src/backends/reference/RefWorkloadFactory.hpp | 3 +
+ src/backends/reference/workloads/CMakeLists.txt | 4 ++
+ .../reference/workloads/RefReverseV2Workload.cpp | 35 ++++++++++
+ .../reference/workloads/RefReverseV2Workload.hpp | 21 ++++++
+ src/backends/reference/workloads/RefWorkloads.hpp | 1 +
+ src/backends/reference/workloads/ReverseV2.cpp | 80 ++++++++++++++++++++++
+ src/backends/reference/workloads/ReverseV2.hpp | 20 ++++++
+ 31 files changed, 525 insertions(+)
+ create mode 100644 src/armnn/layers/ReverseV2Layer.cpp
+ create mode 100644 src/armnn/layers/ReverseV2Layer.hpp
+ create mode 100644 src/backends/reference/workloads/RefReverseV2Workload.cpp
+ create mode 100644 src/backends/reference/workloads/RefReverseV2Workload.hpp
+ create mode 100644 src/backends/reference/workloads/ReverseV2.cpp
+ create mode 100644 src/backends/reference/workloads/ReverseV2.hpp
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 962dc2d..52c8785 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -380,6 +380,8 @@ list(APPEND armnn_sources
+ src/armnn/layers/TransposeConvolution2dLayer.hpp
+ src/armnn/layers/TransposeLayer.hpp
+ src/armnn/layers/TransposeLayer.cpp
++ src/armnn/layers/ReverseV2Layer.hpp
++ src/armnn/layers/ReverseV2Layer.cpp
+ src/armnn/layers/ReduceSumLayer.hpp
+ src/armnn/layers/ReduceSumLayer.cpp
+ src/armnn/BackendRegistry.cpp
+diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
+index 3651c20..c7123f1 100644
+--- a/include/armnn/Descriptors.hpp
++++ b/include/armnn/Descriptors.hpp
+@@ -1215,6 +1215,34 @@ struct TransposeDescriptor
+ PermutationVector m_DimMappings;
+ };
+
++/// A ReverseV2Descriptor for the ReverseV2.
++struct ReverseV2Descriptor
++{
++ ReverseV2Descriptor()
++ : m_TargetWidth(0)
++ , m_TargetHeight(0)
++ , m_Axis(0)
++ , m_DataLayout(DataLayout::NCHW)
++ {}
++
++ bool operator ==(const ReverseV2Descriptor& rhs) const
++ {
++ return m_TargetWidth == rhs.m_TargetWidth &&
++ m_TargetHeight == rhs.m_TargetHeight &&
++ m_Axis == rhs.m_Axis &&
++ m_DataLayout == rhs.m_DataLayout;
++ }
++
++ /// Target width value.
++ uint32_t m_TargetWidth;
++ /// Target height value.
++ uint32_t m_TargetHeight;
++ /// The indices of the dimensions to reverse.
++ int32_t m_Axis;
++ /// The data layout to be used (NCHW, NHWC).
++ DataLayout m_DataLayout;
++};
++
+ /// A ReduceSumDescriptor for the REDUCE SUM.
+ struct ReduceSumDescriptor
+ {
+diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
+index cef85d5..38e74cd 100644
+--- a/include/armnn/DescriptorsFwd.hpp
++++ b/include/armnn/DescriptorsFwd.hpp
+@@ -33,6 +33,7 @@ struct QLstmDescriptor;
+ struct ReshapeDescriptor;
+ struct ResizeBilinearDescriptor;
+ struct ResizeDescriptor;
++struct ReverseV2Descriptor;
+ struct ReduceSumDescriptor;
+ struct SoftmaxDescriptor;
+ struct SpaceToBatchNdDescriptor;
+diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
+index 0701790..670c856 100644
+--- a/include/armnn/ILayerSupport.hpp
++++ b/include/armnn/ILayerSupport.hpp
+@@ -392,6 +392,11 @@ public:
+ const TransposeDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
++ virtual bool IsReverseV2Supported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReverseV2Descriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
++
+ virtual bool IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
+index cd57275..a40dbae 100644
+--- a/include/armnn/ILayerVisitor.hpp
++++ b/include/armnn/ILayerVisitor.hpp
+@@ -403,6 +403,14 @@ public:
+ const ResizeDescriptor& resizeDescriptor,
+ const char* name = nullptr) = 0;
+
++ /// Function that a reversev2 layer should call back to when its Accept(ILayerVisitor&) function is invoked.
++ /// @param layer - pointer to the layer which is calling back to this visit function.
++ /// @param reversev2Descriptor - Parameters for the reversev2 operation.
++ /// @param name - Optional name for the layer.
++ virtual void VisitReverseV2Layer(const IConnectableLayer* layer,
++ const ReverseV2Descriptor& reversev2Descriptor,
++ const char* name = nullptr) = 0;
++
+ /// Function that a reduce_sum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+ /// @param layer - pointer to the layer which is calling back to this visit function.
+ /// @param ReduceSumDescriptor - Parameters for the reduce max operation.
+diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
+index 79ad686..6678a1c 100644
+--- a/include/armnn/INetwork.hpp
++++ b/include/armnn/INetwork.hpp
+@@ -360,6 +360,13 @@ public:
+ virtual IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
+ const char* name = nullptr) = 0;
+
++ /// Adds a reversev2 layer to the network.
++ /// @param reversev2Descriptor - Parameters for the reversev2 operation.
++ /// @param name - Optional name for the layer.
++ /// @return - Interface for configuring the layer.
++ virtual IConnectableLayer* AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor,
++ const char* name = nullptr) = 0;
++
+ /// Adds a reducemax layer to the network.
+ /// @param ReduceSumDescriptor - Parameters for the reducemax operation.
+ /// @param name - Optional name for the layer.
+diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp
+index 209ef2c..80d4dfb 100644
+--- a/include/armnn/LayerVisitorBase.hpp
++++ b/include/armnn/LayerVisitorBase.hpp
+@@ -204,6 +204,10 @@ public:
+ const ResizeDescriptor&,
+ const char*) override { DefaultPolicy::Apply(__func__); }
+
++ void VisitReverseV2Layer(const IConnectableLayer*,
++ const ReverseV2Descriptor&,
++ const char*) override { DefaultPolicy::Apply(__func__); }
++
+ void VisitReduceSumLayer(const IConnectableLayer*,
+ const ReduceSumDescriptor&,
+ const char*) override { DefaultPolicy::Apply(__func__); }
+diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
+index 5f5ee01..e523d52 100644
+--- a/src/armnn/InternalTypes.hpp
++++ b/src/armnn/InternalTypes.hpp
+@@ -72,6 +72,7 @@
+ X(Switch) \
+ X(Transpose) \
+ X(TransposeConvolution2d) \
++ X(ReverseV2) \
+ X(ReduceSum)
+
+ /// When adding a new layer, adapt also the LastLayer enum value in the
+diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
+index 5092828..7ac517c 100644
+--- a/src/armnn/LayersFwd.hpp
++++ b/src/armnn/LayersFwd.hpp
+@@ -54,6 +54,7 @@
+ #include "layers/QuantizedLstmLayer.hpp"
+ #include "layers/ReshapeLayer.hpp"
+ #include "layers/ResizeLayer.hpp"
++#include "layers/ReverseV2Layer.hpp"
+ #include "layers/ReduceSumLayer.hpp"
+ #include "layers/SliceLayer.hpp"
+ #include "layers/SoftmaxLayer.hpp"
+@@ -143,6 +144,7 @@ DECLARE_LAYER(QLstm)
+ DECLARE_LAYER(QuantizedLstm)
+ DECLARE_LAYER(Reshape)
+ DECLARE_LAYER(Resize)
++DECLARE_LAYER(ReverseV2)
+ DECLARE_LAYER(ReduceSum)
+ DECLARE_LAYER(Slice)
+ DECLARE_LAYER(Softmax)
+diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
+index 335e104..bc6738e 100644
+--- a/src/armnn/Network.cpp
++++ b/src/armnn/Network.cpp
+@@ -1472,6 +1472,12 @@ resizeDescriptor, const char* name)
+ return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name);
+ }
+
++IConnectableLayer* Network::AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor,
++ const char* name)
++{
++ return m_Graph->AddLayer<ReverseV2Layer>(reversev2Descriptor, name);
++}
++
+ IConnectableLayer* Network::AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor,
+ const char* name)
+ {
+diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
+index 6c767f3..95d235e 100644
+--- a/src/armnn/Network.hpp
++++ b/src/armnn/Network.hpp
+@@ -160,6 +160,9 @@ public:
+ IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor,
+ const char* name = nullptr) override;
+
++ IConnectableLayer* AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor,
++ const char* name = nullptr) override;
++
+ IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor,
+ const char* name = nullptr) override;
+
+diff --git a/src/armnn/layers/ReverseV2Layer.cpp b/src/armnn/layers/ReverseV2Layer.cpp
+new file mode 100644
+index 0000000..0921a3d
+--- /dev/null
++++ b/src/armnn/layers/ReverseV2Layer.cpp
+@@ -0,0 +1,75 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReverseV2Layer.hpp"
++#include "LayerCloneBase.hpp"
++
++#include <armnn/TypesUtils.hpp>
++
++#include <armnnUtils/DataLayoutIndexed.hpp>
++
++#include <backendsCommon/WorkloadData.hpp>
++#include <backendsCommon/WorkloadFactory.hpp>
++
++using namespace armnnUtils;
++
++namespace armnn
++{
++
++ReverseV2Layer::ReverseV2Layer(const ReverseV2Descriptor& param, const char* name)
++ : LayerWithParameters(1, 1, LayerType::ReverseV2, param, name)
++{
++}
++
++std::unique_ptr<IWorkload> ReverseV2Layer::CreateWorkload(const IWorkloadFactory& factory) const
++{
++ ReverseV2QueueDescriptor descriptor;
++ return factory.CreateReverseV2(descriptor, PrepInfoAndDesc(descriptor));
++}
++
++ReverseV2Layer* ReverseV2Layer::Clone(Graph& graph) const
++{
++ return CloneBase<ReverseV2Layer>(graph, m_Param, GetName());
++}
++
++std::vector<TensorShape> ReverseV2Layer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
++{
++ ARMNN_ASSERT(inputShapes.size() == 1);
++
++ const TensorShape& inputShape = inputShapes[0];
++ const DataLayoutIndexed dimensionIndices = m_Param.m_DataLayout;
++
++ unsigned int outWidth = m_Param.m_TargetWidth;
++ unsigned int outHeight = m_Param.m_TargetHeight;
++ unsigned int outChannels = inputShape[dimensionIndices.GetChannelsIndex()];
++ unsigned int outBatch = inputShape[0];
++
++ TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ?
++ TensorShape( { outBatch, outHeight, outWidth, outChannels } ) :
++ TensorShape( { outBatch, outChannels, outHeight, outWidth });
++
++ return std::vector<TensorShape>({ tensorShape });
++}
++
++void ReverseV2Layer::ValidateTensorShapesFromInputs()
++{
++ VerifyLayerConnections(1, CHECK_LOCATION());
++
++ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
++
++ ARMNN_ASSERT(inferredShapes.size() == 1);
++
++ ConditionalThrowIfNotEqual<LayerValidationException>(
++ "ReverseV2Layer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
++ GetOutputSlot(0).GetTensorInfo().GetShape(),
++ inferredShapes[0]);
++}
++
++void ReverseV2Layer::Accept(ILayerVisitor& visitor) const
++{
++ visitor.VisitReverseV2Layer(this, GetParameters(), GetName());
++}
++
++} // namespace armnn
+diff --git a/src/armnn/layers/ReverseV2Layer.hpp b/src/armnn/layers/ReverseV2Layer.hpp
+new file mode 100644
+index 0000000..65acdfb
+--- /dev/null
++++ b/src/armnn/layers/ReverseV2Layer.hpp
+@@ -0,0 +1,48 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++#pragma once
++
++#include "LayerWithParameters.hpp"
++
++namespace armnn
++{
++
++/// This layer represents a reversev2 operation.
++class ReverseV2Layer : public LayerWithParameters<ReverseV2Descriptor>
++{
++public:
++ /// Makes a workload for the ReverseV2 type.
++ /// @param [in] graph The graph where this layer can be found.
++ /// @param [in] factory The workload factory which will create the workload.
++ /// @return A pointer to the created workload, or nullptr if not created.
++ virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override;
++
++ /// Creates a dynamically-allocated copy of this layer.
++ /// @param [in] graph The graph into which this layer is being cloned.
++ ReverseV2Layer* Clone(Graph& graph) const override;
++
++ /// Check if the input tensor shape(s)
++ /// will lead to a valid configuration of @ref ReverseV2Layer.
++ void ValidateTensorShapesFromInputs() override;
++
++ /// By default returns inputShapes if the number of inputs are equal to number of outputs,
++ /// otherwise infers the output shapes from given input shapes and layer properties.
++ /// @param [in] inputShapes The input shapes layer has.
++ /// @return A vector to the inferred output shape.
++ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
++
++ void Accept(ILayerVisitor& visitor) const override;
++
++protected:
++ /// Constructor to create a ReverseV2Layer.
++ /// @param [in] param ReverseV2Descriptor to configure the resize operation.
++ /// @param [in] name Optional name for the layer.
++ ReverseV2Layer(const ReverseV2Descriptor& param, const char* name);
++
++ /// Default destructor
++ ~ReverseV2Layer() = default;
++};
++
++} // namespace armnn
+diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
+index e5400dc..3da7288 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.cpp
++++ b/src/armnnTfLiteParser/TfLiteParser.cpp
+@@ -531,6 +531,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o
+ m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE] = &TfLiteParser::ParseTranspose;
+ m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV] = &TfLiteParser::ParseTransposeConv;
+ m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack;
++ m_ParserFunctions[tflite::BuiltinOperator_REVERSE_V2] = &TfLiteParser::ParseReverse_v2;
+ m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax;
+ m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum;
+
+@@ -2732,6 +2733,46 @@ void TfLiteParser::ParseSplitV(size_t subgraphIndex, size_t operatorIndex)
+ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
+ }
+
++void TfLiteParser::ParseReverse_v2(size_t subgraphIndex, size_t operatorIndex)
++{
++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(inputs.size(), 2);
++
++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(outputs.size(), 1);
++
++ auto layerName = boost::str(boost::format("Reverse_v2:%1%:%2%") % subgraphIndex % operatorIndex);
++
++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]);
++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]);
++
++ TensorShape shape = sizeTensorInfo0.GetShape();
++
++ // Get const axis value from model and set it to descriptor.
++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer);
++
++ ReverseV2Descriptor desc;
++ desc.m_Axis = axisBufferPtr->data.data()[0];
++ desc.m_TargetHeight = shape[1];
++ desc.m_TargetWidth = shape[2];
++ desc.m_DataLayout = armnn::DataLayout::NHWC;
++
++ // Register a new layer object, ReverseV2, to in-memory network of ARMNN.
++ IConnectableLayer *layer = m_Network->AddReverseV2Layer(desc, layerName.c_str());
++
++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
++
++ // Register input tensor to the layer.
++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
++
++ // Register output tensor to the layer.
++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
++}
++
+ void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex)
+ {
+ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
+diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
+index 13d1cb4..7970559 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.hpp
++++ b/src/armnnTfLiteParser/TfLiteParser.hpp
+@@ -133,6 +133,7 @@ private:
+ void ParseTranspose(size_t subgraphIndex, size_t operatorIndex);
+ void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex);
+ void ParseUnpack(size_t subgraphIndex, size_t operatorIndex);
++ void ParseReverse_v2(size_t subgraphIndex, size_t operatorIndex);
+ void ParseArgMax(size_t subgraphIndex, size_t operatorIndex);
+ void ParseSum(size_t subgraphIndex, size_t operatorIndex);
+
+diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
+index 245b165..0c1c1e2 100644
+--- a/src/backends/backendsCommon/LayerSupportBase.cpp
++++ b/src/backends/backendsCommon/LayerSupportBase.cpp
+@@ -615,6 +615,14 @@ bool LayerSupportBase::IsTransposeSupported(const TensorInfo& /*input*/,
+ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+ }
+
++bool LayerSupportBase::IsReverseV2Supported(const TensorInfo& /*input*/,
++ const TensorInfo& /*output*/,
++ const ReverseV2Descriptor& /*descriptor*/,
++ Optional<std::string&> reasonIfUnsupported) const
++{
++ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
++}
++
+ bool LayerSupportBase::IsReduceSumSupported(const TensorInfo& /*input*/,
+ const TensorInfo& /*output*/,
+ const ReduceSumDescriptor& /*descriptor*/,
+diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
+index 9b39f8f..6f1e6e0 100644
+--- a/src/backends/backendsCommon/LayerSupportBase.hpp
++++ b/src/backends/backendsCommon/LayerSupportBase.hpp
+@@ -377,6 +377,11 @@ public:
+ const TransposeDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
++ bool IsReverseV2Supported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReverseV2Descriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
++
+ bool IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
+index 7455ab5..dcbec11 100644
+--- a/src/backends/backendsCommon/WorkloadData.cpp
++++ b/src/backends/backendsCommon/WorkloadData.cpp
+@@ -3483,6 +3483,55 @@ void ElementwiseUnaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo)
+ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+ }
+
++void ReverseV2QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
++{
++ const std::string descriptorName{"ReverseV2QueueDescriptor"};
++
++ ValidateNumInputs(workloadInfo, descriptorName, 1);
++ ValidateNumOutputs(workloadInfo, descriptorName, 1);
++
++ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
++ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
++
++ ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input");
++ ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output");
++
++ std::vector<DataType> supportedTypes =
++ {
++ DataType::BFloat16,
++ DataType::Float16,
++ DataType::Float32,
++ DataType::QAsymmS8,
++ DataType::QAsymmU8,
++ DataType::QSymmS16
++ };
++
++ ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName);
++ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
++
++ // ReverseV2 only changes width and height: batch and channel count must match.
++ const unsigned int inputBatchSize = inputTensorInfo.GetShape()[0];
++ const unsigned int outputBatchSize = outputTensorInfo.GetShape()[0];
++ if (inputBatchSize != outputBatchSize)
++ {
++ throw InvalidArgumentException(
++ boost::str(boost::format("%1%: Input batch size (%2%) "
++ "does not match output batch size (%3%)") %
++ descriptorName % inputBatchSize % outputBatchSize));
++ }
++
++ DataLayoutIndexed dimensionIndices(m_Parameters.m_DataLayout);
++ const unsigned int inputChannelCount = inputTensorInfo.GetShape()[dimensionIndices.GetChannelsIndex()];
++ const unsigned int outputChannelCount = outputTensorInfo.GetShape()[dimensionIndices.GetChannelsIndex()];
++ if (inputChannelCount != outputChannelCount)
++ {
++ throw InvalidArgumentException(
++ boost::str(boost::format("%1%: Input channel count (%2%) "
++ "does not match output channel count (%3%)") %
++ descriptorName % inputChannelCount % outputChannelCount));
++ }
++}
++
+ void ReduceSumQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+ {
+ const std::string descriptorName{"ReduceSumQueueDescriptor"};
+diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
+index 6f203b5..0cbe8aa 100644
+--- a/src/backends/backendsCommon/WorkloadData.hpp
++++ b/src/backends/backendsCommon/WorkloadData.hpp
+@@ -634,6 +634,11 @@ struct ElementwiseUnaryQueueDescriptor : QueueDescriptorWithParameters<Elementwi
+ void Validate(const WorkloadInfo& workloadInfo) const;
+ };
+
++struct ReverseV2QueueDescriptor : QueueDescriptorWithParameters<ReverseV2Descriptor>
++{
++ void Validate(const WorkloadInfo& workloadInfo) const;
++};
++
+ struct ReduceSumQueueDescriptor : QueueDescriptorWithParameters<ReduceSumDescriptor>
+ {
+ void Validate(const WorkloadInfo& workloadInfo) const;
+diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
+index b7195f7..31e6bfd 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.cpp
++++ b/src/backends/backendsCommon/WorkloadFactory.cpp
+@@ -1178,6 +1178,18 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+
+ break;
+ }
++ case LayerType::ReverseV2:
++ {
++ auto cLayer = PolymorphicDowncast<const ReverseV2Layer*>(&layer);
++ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
++ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
++
++ result = layerSupportObject->IsReverseV2Supported(OverrideDataType(input, dataType),
++ OverrideDataType(output, dataType),
++ cLayer->GetParameters(),
++ reason);
++ break;
++ }
+ case LayerType::ReduceSum:
+ {
+ auto cLayer = PolymorphicDowncast<const ReduceSumLayer*>(&layer);
+@@ -1596,6 +1608,12 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateTransposeConvolution2d(
+ return std::unique_ptr<IWorkload>();
+ }
+
++std::unique_ptr<IWorkload> IWorkloadFactory::CreateReverseV2(const ReverseV2QueueDescriptor& /*descriptor*/,
++ const WorkloadInfo& /*info*/) const
++{
++ return std::unique_ptr<IWorkload>();
++}
++
+ std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& /*descriptor*/,
+ const WorkloadInfo& /*info*/) const
+ {
+diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
+index 0d98c92..740da18 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.hpp
++++ b/src/backends/backendsCommon/WorkloadFactory.hpp
+@@ -250,6 +250,10 @@ public:
+ virtual std::unique_ptr<IWorkload> CreateTransposeConvolution2d(
+ const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
++
++ virtual std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor,
++ const WorkloadInfo& info) const;
++
+ virtual std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
+ };
+diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
+index 333ad4d..1ac947e 100644
+--- a/src/backends/reference/RefLayerSupport.cpp
++++ b/src/backends/reference/RefLayerSupport.cpp
+@@ -2132,6 +2132,35 @@ bool RefLayerSupport::IsTransposeSupported(const TensorInfo& input,
+ return supported;
+ }
+
++bool RefLayerSupport::IsReverseV2Supported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReverseV2Descriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported) const
++{
++ IgnoreUnused(descriptor);
++ bool supported = true;
++ std::array<DataType,6> supportedTypes =
++ {
++ DataType::BFloat16,
++ DataType::Float32,
++ DataType::Float16,
++ DataType::QAsymmS8,
++ DataType::QAsymmU8,
++ DataType::QSymmS16
++ };
++
++ supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
++ "Reference ReverseV2: input type not supported");
++
++ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
++ "Reference ReverseV2: output type not supported");
++
++ supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
++ "Reference ReverseV2: input and output types not matching");
++
++ return supported;
++}
++
+ bool RefLayerSupport::IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
+index 766ddfa..cdc2adb 100644
+--- a/src/backends/reference/RefLayerSupport.hpp
++++ b/src/backends/reference/RefLayerSupport.hpp
+@@ -342,6 +342,11 @@ public:
+ const TransposeDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
++ bool IsReverseV2Supported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReverseV2Descriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
++
+ bool IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
+index 9602df5..7d1e810 100644
+--- a/src/backends/reference/RefWorkloadFactory.cpp
++++ b/src/backends/reference/RefWorkloadFactory.cpp
+@@ -632,6 +632,12 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d(
+ return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info);
+ }
+
++std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReverseV2(const ReverseV2QueueDescriptor& descriptor,
++ const WorkloadInfo& info) const
++{
++ return std::make_unique<RefReverseV2Workload>(descriptor, info);
++}
++
+ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+ {
+diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
+index 93cab9a..14df6b8 100644
+--- a/src/backends/reference/RefWorkloadFactory.hpp
++++ b/src/backends/reference/RefWorkloadFactory.hpp
+@@ -250,6 +250,9 @@ public:
+ std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
++ std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor,
++ const WorkloadInfo& info) const override;
++
+ std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
+diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
+index d5eceff..f68a673 100644
+--- a/src/backends/reference/workloads/CMakeLists.txt
++++ b/src/backends/reference/workloads/CMakeLists.txt
+@@ -51,6 +51,8 @@ list(APPEND armnnRefBackendWorkloads_sources
+ Pad.hpp
+ ReduceSum.cpp
+ ReduceSum.hpp
++ ReverseV2.cpp
++ ReverseV2.hpp
+ Pooling2d.cpp
+ Pooling2d.hpp
+ PreluImpl.cpp
+@@ -174,6 +176,8 @@ list(APPEND armnnRefBackendWorkloads_sources
+ TensorBufferArrayView.hpp
+ TransposeConvolution2d.cpp
+ TransposeConvolution2d.hpp
++ RefReverseV2Workload.cpp
++ RefReverseV2Workload.hpp
+ RefReduceSumWorkload.cpp
+ RefReduceSumWorkload.hpp
+ )
+diff --git a/src/backends/reference/workloads/RefReverseV2Workload.cpp b/src/backends/reference/workloads/RefReverseV2Workload.cpp
+new file mode 100644
+index 0000000..73ceba4
+--- /dev/null
++++ b/src/backends/reference/workloads/RefReverseV2Workload.cpp
+@@ -0,0 +1,35 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "RefReverseV2Workload.hpp"
++
++#include "ReverseV2.hpp"
++#include "RefWorkloadUtils.hpp"
++#include "BaseIterator.hpp"
++#include "Profiling.hpp"
++
++#include "BaseIterator.hpp"
++#include "Decoders.hpp"
++#include "Encoders.hpp"
++
++namespace armnn
++{
++
++void RefReverseV2Workload::Execute() const
++{
++ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReverseV2Workload_Execute");
++
++ const TensorInfo& inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
++ const TensorInfo& outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
++
++ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map());
++ Decoder<float> &decoder = *decoderPtr;
++
++ float *output = GetOutputTensorData<float>(0, m_Data);
++
++ ReverseV2(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Axis);
++}
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/RefReverseV2Workload.hpp b/src/backends/reference/workloads/RefReverseV2Workload.hpp
+new file mode 100644
+index 0000000..3c71dfa
+--- /dev/null
++++ b/src/backends/reference/workloads/RefReverseV2Workload.hpp
+@@ -0,0 +1,21 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include <backendsCommon/Workload.hpp>
++#include <backendsCommon/WorkloadData.hpp>
++
++namespace armnn
++{
++
++class RefReverseV2Workload : public BaseWorkload<ReverseV2QueueDescriptor>
++{
++public:
++ using BaseWorkload<ReverseV2QueueDescriptor>::BaseWorkload;
++ virtual void Execute() const override;
++};
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
+index c80ed43..9427d5c 100644
+--- a/src/backends/reference/workloads/RefWorkloads.hpp
++++ b/src/backends/reference/workloads/RefWorkloads.hpp
+@@ -67,4 +67,5 @@
+ #include "Softmax.hpp"
+ #include "Splitter.hpp"
+ #include "TensorBufferArrayView.hpp"
++#include "RefReverseV2Workload.hpp"
+ #include "RefReduceSumWorkload.hpp"
+diff --git a/src/backends/reference/workloads/ReverseV2.cpp b/src/backends/reference/workloads/ReverseV2.cpp
+new file mode 100644
+index 0000000..1bfd350
+--- /dev/null
++++ b/src/backends/reference/workloads/ReverseV2.cpp
+@@ -0,0 +1,80 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReverseV2.hpp"
++
++#include <armnnUtils/TensorUtils.hpp>
++
++#include <boost/numeric/conversion/cast.hpp>
++#include <algorithm>
++#include <iostream>
++
++namespace armnn
++{
++
++void ReverseV2(Decoder<float>& in, float *out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, int32_t axis)
++{
++ IgnoreUnused(outputTensorInfo);
++
++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
++ const unsigned int batchSize = inputTensorInfo.GetShape()[0];
++ unsigned int height = inputTensorInfo.GetShape()[1];
++ unsigned int width = inputTensorInfo.GetShape()[2];
++ unsigned int channel = inputTensorInfo.GetShape()[3];
++
++ // TODO. Integreate below four if conditions with one.
++ if (uAxis == 3) {
++ for (unsigned int b = 0; b < batchSize; ++b) {
++ for (unsigned int y = 0; y < height; ++y) {
++ for (unsigned int x = 0; x < width; ++x) {
++ for (unsigned int c = 0; c < channel; ++c) {
++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c];
++ float in_val = in.Get();
++ out[(b * height * width * channel) + (y * width * channel) + (x * channel) + (channel - 1 - c)] = in_val;
++ }
++ }
++ }
++ }
++ } else if (uAxis == 2) {
++ for (unsigned int b = 0; b < batchSize; ++b) {
++ for (unsigned int y = 0; y < height; ++y) {
++ for (unsigned int c = 0; c < channel; ++c) {
++ for (unsigned int x = 0; x < width; ++x) {
++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c];
++ float in_val = in.Get();
++ out[(b * height * width * channel) + (y * width * channel) + ((width - 1 - x) * channel) + c] = in_val;
++ }
++ }
++ }
++ }
++ } else if (uAxis == 1) {
++ for (unsigned int b = 0; b < batchSize; ++b) {
++ for (unsigned int y = 0; y < height; ++y) {
++ for (unsigned int x = 0; x < width; ++x) {
++ for (unsigned int c = 0; c < channel; ++c) {
++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c];
++ float in_val = in.Get();
++ out[(b * height * width * channel) + ((height - 1 - y) * width * channel) + (x * channel) + c] = in_val;
++ }
++ }
++ }
++ }
++ } else if (uAxis == 0) {
++ for (unsigned int b = 0; b < batchSize; ++b) {
++ for (unsigned int y = 0; y < height; ++y) {
++ for (unsigned int x = 0; x < width; ++x) {
++ for (unsigned int c = 0; c < channel; ++c) {
++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c];
++ float in_val = in.Get();
++ out[(b * height * width * channel) + (y * width * channel) + (x * channel) + c] = in_val;
++ }
++ }
++ }
++ }
++ }
++}
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/ReverseV2.hpp b/src/backends/reference/workloads/ReverseV2.hpp
+new file mode 100644
+index 0000000..3957959
+--- /dev/null
++++ b/src/backends/reference/workloads/ReverseV2.hpp
+@@ -0,0 +1,20 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include "armnn/Tensor.hpp"
++#include "armnn/Descriptors.hpp"
++
++#include "Decoders.hpp"
++
++namespace armnn
++{
++
++void ReverseV2(Decoder<float>& in, float* out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, int32_t axis);
++
++} //namespace armnn
++
+--
+2.7.4
+
diff --git a/packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch b/packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch
new file mode 100644
index 000000000..ed8a63923
--- /dev/null
+++ b/packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch
@@ -0,0 +1,282 @@
+From ba1bc1ba932624309210e838fcf418dcbf06ea62 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Tue, 8 Sep 2020 10:47:24 +0900
+Subject: [PATCH 05/10] backends/test: Add ReverseV2 op test cases
+
+Change-Id: I46306668501f160fef00e6e01547d285c75c7125
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ src/backends/backendsCommon/test/CMakeLists.txt | 2 +
+ src/backends/backendsCommon/test/LayerTests.hpp | 1 +
+ .../test/layerTests/ReverseV2TestImpl.cpp | 186 +++++++++++++++++++++
+ .../test/layerTests/ReverseV2TestImpl.hpp | 25 +++
+ src/backends/reference/test/RefLayerTests.cpp | 5 +
+ 5 files changed, 219 insertions(+)
+ create mode 100644 src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp
+ create mode 100644 src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp
+
+diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
+index 951a46d..e548b2b 100644
+--- a/src/backends/backendsCommon/test/CMakeLists.txt
++++ b/src/backends/backendsCommon/test/CMakeLists.txt
+@@ -133,6 +133,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources
+ layerTests/ReshapeTestImpl.hpp
+ layerTests/ResizeTestImpl.cpp
+ layerTests/ResizeTestImpl.hpp
++ layerTests/ReverseV2TestImpl.cpp
++ layerTests/ReverseV2TestImpl.hpp
+ layerTests/RsqrtTestImpl.cpp
+ layerTests/RsqrtTestImpl.hpp
+ layerTests/SliceTestImpl.cpp
+diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
+index 25f411f..b3cfe4a 100644
+--- a/src/backends/backendsCommon/test/LayerTests.hpp
++++ b/src/backends/backendsCommon/test/LayerTests.hpp
+@@ -45,6 +45,7 @@
+ #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
++#include <backendsCommon/test/layerTests/ReverseV2TestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp>
+diff --git a/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp
+new file mode 100644
+index 0000000..c0134a4
+--- /dev/null
++++ b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp
+@@ -0,0 +1,186 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReverseV2TestImpl.hpp"
++
++#include <backendsCommon/test/DataTypeUtils.hpp>
++#include <backendsCommon/test/TensorCopyUtils.hpp>
++#include <backendsCommon/test/WorkloadTestUtils.hpp>
++
++#include <test/TensorHelpers.hpp>
++
++namespace
++{
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReverseV2TestCommon(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
++ const armnn::TensorInfo inputTensorInfo,
++ const armnn::TensorInfo outputTensorInfo,
++ const std::vector<float>& inputData,
++ const std::vector<float>& outputData,
++ int axis = 3)
++{
++ IgnoreUnused(memoryManager);
++ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
++
++ LayerTestResult<float, 4> result(outputTensorInfo);
++ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
++
++ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
++ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
++
++ armnn::ReverseV2QueueDescriptor descriptor;
++ unsigned int updated_idx = static_cast<uint32_t>(axis);
++ if (axis < 0) {
++ updated_idx = static_cast<uint32_t>(static_cast<int32_t>(inputTensorInfo.GetNumDimensions()) + axis);
++ }
++
++ descriptor.m_Parameters.m_Axis = updated_idx;
++ descriptor.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW;
++ armnn::WorkloadInfo info;
++
++ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
++ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
++
++ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReverseV2(descriptor, info);
++
++ inputHandle->Allocate();
++ outputHandle->Allocate();
++
++ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin());
++
++ workload->Execute();
++
++ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
++
++ return result;
++}
++
++} // namespace
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReverseV2SimpleTest(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 1, 1, 5 };
++ const armnn::TensorShape outputShape{ 1, 1, 1, 5};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f });
++ std::vector<float> outputValues({ 9.0f, 10.0f, 8.0f, 2.0f, 5.0f });
++
++ return ReverseV2TestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, -1);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReverseV2MultiChannel_1Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
++ const armnn::TensorShape outputShape{ 1, 3, 2, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++ std::vector<float> outputValues({ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f});
++
++ return ReverseV2TestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 1);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReverseV2MultiChannel_2Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
++ const armnn::TensorShape outputShape{ 1, 3, 2, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++ std::vector<float> outputValues({ 5.0f, 6.0f, 7.0f, 8.0f,
++ 1.0f, 2.0f, 3.0f, 4.0f,
++
++ 50.0f, 60.0f, 70.0f, 80.0f,
++ 10.0f, 20.0f, 30.0f, 40.0f,
++
++ 500.0f, 600.0f, 700.0f, 800.0f,
++ 100.0f, 200.0f, 300.0f, 400.0f });
++
++ return ReverseV2TestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 2);
++}
++
++// Explicit template specializations
++
++template LayerTestResult<float, 4>
++ReverseV2SimpleTest<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReverseV2MultiChannel_1Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReverseV2MultiChannel_2Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+diff --git a/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp
+new file mode 100644
+index 0000000..4eb93cf
+--- /dev/null
++++ b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp
+@@ -0,0 +1,25 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include "LayerTestResult.hpp"
++
++#include <ResolveType.hpp>
++
++#include <armnn/backends/IBackendInternal.hpp>
++#include <backendsCommon/WorkloadFactory.hpp>
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReverseV2SimpleTest(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReverseV2MultiChannel_1Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReverseV2MultiChannel_2Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
+index 9461e2a..a5b1f9d 100644
+--- a/src/backends/reference/test/RefLayerTests.cpp
++++ b/src/backends/reference/test/RefLayerTests.cpp
+@@ -1987,4 +1987,9 @@ ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannel2Float32, ReduceSumMultiChannel_2Test<
+ ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannelFloat32, ReduceSumMultiBatchAndChannelTest<DataType::Float32>)
+ ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannel_2Float32, ReduceSumMultiBatchAndChannel_2Test<DataType::Float32>)
+
++// ReverseV2
++ARMNN_AUTO_TEST_CASE(ReverseV2Float32, ReverseV2SimpleTest<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReverseV2MultiChannelFloat32, ReverseV2MultiChannel_1Test<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReverseV2MultiChannel2Float32, ReverseV2MultiChannel_2Test<DataType::Float32>)
++
+ BOOST_AUTO_TEST_SUITE_END()
+--
+2.7.4
+
diff --git a/packaging/0006-backends-reference-Add-ReduceMax-op-support.patch b/packaging/0006-backends-reference-Add-ReduceMax-op-support.patch
new file mode 100644
index 000000000..d099562f2
--- /dev/null
+++ b/packaging/0006-backends-reference-Add-ReduceMax-op-support.patch
@@ -0,0 +1,892 @@
+From caa3fe740a66fe69c89bbd0fb9bd0183fb327f22 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Tue, 8 Sep 2020 11:43:19 +0900
+Subject: [PATCH 06/10] backends/reference: Add ReduceMax op support
+
+Change-Id: I5f8825a94aa0d24ffe5890c6c42088f7cdba4860
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ CMakeLists.txt | 2 +
+ include/armnn/Descriptors.hpp | 32 ++++++++++
+ include/armnn/DescriptorsFwd.hpp | 1 +
+ include/armnn/ILayerSupport.hpp | 5 ++
+ include/armnn/ILayerVisitor.hpp | 8 +++
+ include/armnn/INetwork.hpp | 7 +++
+ include/armnn/LayerVisitorBase.hpp | 4 ++
+ src/armnn/InternalTypes.hpp | 1 +
+ src/armnn/LayersFwd.hpp | 2 +
+ src/armnn/Network.cpp | 6 ++
+ src/armnn/Network.hpp | 3 +
+ src/armnn/layers/ReduceMaxLayer.cpp | 70 +++++++++++++++++++++
+ src/armnn/layers/ReduceMaxLayer.hpp | 48 +++++++++++++++
+ src/armnnTfLiteParser/TfLiteParser.cpp | 41 +++++++++++++
+ src/armnnTfLiteParser/TfLiteParser.hpp | 1 +
+ src/backends/backendsCommon/LayerSupportBase.cpp | 8 +++
+ src/backends/backendsCommon/LayerSupportBase.hpp | 5 ++
+ src/backends/backendsCommon/WorkloadData.cpp | 39 ++++++++++++
+ src/backends/backendsCommon/WorkloadData.hpp | 5 ++
+ src/backends/backendsCommon/WorkloadFactory.cpp | 18 ++++++
+ src/backends/backendsCommon/WorkloadFactory.hpp | 3 +
+ src/backends/reference/RefLayerSupport.cpp | 30 +++++++++
+ src/backends/reference/RefLayerSupport.hpp | 5 ++
+ src/backends/reference/RefWorkloadFactory.cpp | 6 ++
+ src/backends/reference/RefWorkloadFactory.hpp | 3 +
+ src/backends/reference/workloads/CMakeLists.txt | 4 ++
+ src/backends/reference/workloads/ReduceMax.cpp | 71 ++++++++++++++++++++++
+ src/backends/reference/workloads/ReduceMax.hpp | 20 ++++++
+ .../reference/workloads/RefReduceMaxWorkload.cpp | 35 +++++++++++
+ .../reference/workloads/RefReduceMaxWorkload.hpp | 21 +++++++
+ src/backends/reference/workloads/RefWorkloads.hpp | 1 +
+ 31 files changed, 505 insertions(+)
+ create mode 100644 src/armnn/layers/ReduceMaxLayer.cpp
+ create mode 100644 src/armnn/layers/ReduceMaxLayer.hpp
+ create mode 100644 src/backends/reference/workloads/ReduceMax.cpp
+ create mode 100644 src/backends/reference/workloads/ReduceMax.hpp
+ create mode 100644 src/backends/reference/workloads/RefReduceMaxWorkload.cpp
+ create mode 100644 src/backends/reference/workloads/RefReduceMaxWorkload.hpp
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 52c8785..631c76f 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -382,6 +382,8 @@ list(APPEND armnn_sources
+ src/armnn/layers/TransposeLayer.cpp
+ src/armnn/layers/ReverseV2Layer.hpp
+ src/armnn/layers/ReverseV2Layer.cpp
++ src/armnn/layers/ReduceMaxLayer.hpp
++ src/armnn/layers/ReduceMaxLayer.cpp
+ src/armnn/layers/ReduceSumLayer.hpp
+ src/armnn/layers/ReduceSumLayer.cpp
+ src/armnn/BackendRegistry.cpp
+diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
+index c7123f1..b7b4f53 100644
+--- a/include/armnn/Descriptors.hpp
++++ b/include/armnn/Descriptors.hpp
+@@ -1243,6 +1243,38 @@ struct ReverseV2Descriptor
+ DataLayout m_DataLayout;
+ };
+
++/// A ReduceMaxDescriptor for the REDUCE MAX.
++struct ReduceMaxDescriptor
++{
++ ReduceMaxDescriptor()
++ : m_TargetWidth(0)
++ , m_TargetHeight(0)
++ , m_Axis(0)
++ , m_Keepdims(0)
++ , m_DataLayout(DataLayout::NCHW)
++ {}
++
++ bool operator ==(const ReduceMaxDescriptor& rhs) const
++ {
++ return m_TargetWidth == rhs.m_TargetWidth &&
++ m_TargetHeight == rhs.m_TargetHeight &&
++ m_Axis == rhs.m_Axis &&
++ m_Keepdims == rhs.m_Keepdims &&
++ m_DataLayout == rhs.m_DataLayout;
++ }
++
++ /// Target width value.
++ uint32_t m_TargetWidth;
++ /// Target height value.
++ uint32_t m_TargetHeight;
++ /// The indices of the dimensions to reduce.
++ int32_t m_Axis;
++ /// If true, retains reduced dimensions with length 1.
++ uint32_t m_Keepdims;
++ /// The data layout to be used (NCHW, NHWC).
++ DataLayout m_DataLayout;
++};
++
+ /// A ReduceSumDescriptor for the REDUCE SUM.
+ struct ReduceSumDescriptor
+ {
+diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
+index 38e74cd..3b736f9 100644
+--- a/include/armnn/DescriptorsFwd.hpp
++++ b/include/armnn/DescriptorsFwd.hpp
+@@ -34,6 +34,7 @@ struct ReshapeDescriptor;
+ struct ResizeBilinearDescriptor;
+ struct ResizeDescriptor;
+ struct ReverseV2Descriptor;
++struct ReduceMaxDescriptor;
+ struct ReduceSumDescriptor;
+ struct SoftmaxDescriptor;
+ struct SpaceToBatchNdDescriptor;
+diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
+index 670c856..ad33727 100644
+--- a/include/armnn/ILayerSupport.hpp
++++ b/include/armnn/ILayerSupport.hpp
+@@ -397,6 +397,11 @@ public:
+ const ReverseV2Descriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
++ virtual bool IsReduceMaxSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceMaxDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
++
+ virtual bool IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp
+index a40dbae..eb09f22 100644
+--- a/include/armnn/ILayerVisitor.hpp
++++ b/include/armnn/ILayerVisitor.hpp
+@@ -411,6 +411,14 @@ public:
+ const ReverseV2Descriptor& reversev2Descriptor,
+ const char* name = nullptr) = 0;
+
++ /// Function that a reduce_max layer should call back to when its Accept(ILayerVisitor&) function is invoked.
++ /// @param layer - pointer to the layer which is calling back to this visit function.
++ /// @param ReduceMaxDescriptor - Parameters for the reduce max operation.
++ /// @param name - Optional name for the layer.
++ virtual void VisitReduceMaxLayer(const IConnectableLayer* layer,
++ const ReduceMaxDescriptor& reducemaxDescriptor,
++ const char* name = nullptr) = 0;
++
+ /// Function that a reduce_sum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
+ /// @param layer - pointer to the layer which is calling back to this visit function.
+ /// @param ReduceSumDescriptor - Parameters for the reduce max operation.
+diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
+index 6678a1c..b0a3e04 100644
+--- a/include/armnn/INetwork.hpp
++++ b/include/armnn/INetwork.hpp
+@@ -368,6 +368,13 @@ public:
+ const char* name = nullptr) = 0;
+
+ /// Adds a reducemax layer to the network.
++ /// @param ReduceMaxDescriptor - Parameters for the reducemax operation.
++ /// @param name - Optional name for the layer.
++ /// @return - Interface for configuring the layer.
++ virtual IConnectableLayer* AddReduceMaxLayer(const ReduceMaxDescriptor& reducemaxDescriptor,
++ const char* name = nullptr) = 0;
++
++ /// Adds a reducemax layer to the network.
+ /// @param ReduceSumDescriptor - Parameters for the reducemax operation.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
+diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp
+index 80d4dfb..4de379a 100644
+--- a/include/armnn/LayerVisitorBase.hpp
++++ b/include/armnn/LayerVisitorBase.hpp
+@@ -208,6 +208,10 @@ public:
+ const ReverseV2Descriptor&,
+ const char*) override { DefaultPolicy::Apply(__func__); }
+
++ void VisitReduceMaxLayer(const IConnectableLayer*,
++ const ReduceMaxDescriptor&,
++ const char*) override { DefaultPolicy::Apply(__func__); }
++
+ void VisitReduceSumLayer(const IConnectableLayer*,
+ const ReduceSumDescriptor&,
+ const char*) override { DefaultPolicy::Apply(__func__); }
+diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
+index e523d52..5c435de 100644
+--- a/src/armnn/InternalTypes.hpp
++++ b/src/armnn/InternalTypes.hpp
+@@ -73,6 +73,7 @@
+ X(Transpose) \
+ X(TransposeConvolution2d) \
+ X(ReverseV2) \
++ X(ReduceMax) \
+ X(ReduceSum)
+
+ /// When adding a new layer, adapt also the LastLayer enum value in the
+diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
+index 7ac517c..69c133f 100644
+--- a/src/armnn/LayersFwd.hpp
++++ b/src/armnn/LayersFwd.hpp
+@@ -55,6 +55,7 @@
+ #include "layers/ReshapeLayer.hpp"
+ #include "layers/ResizeLayer.hpp"
+ #include "layers/ReverseV2Layer.hpp"
++#include "layers/ReduceMaxLayer.hpp"
+ #include "layers/ReduceSumLayer.hpp"
+ #include "layers/SliceLayer.hpp"
+ #include "layers/SoftmaxLayer.hpp"
+@@ -145,6 +146,7 @@ DECLARE_LAYER(QuantizedLstm)
+ DECLARE_LAYER(Reshape)
+ DECLARE_LAYER(Resize)
+ DECLARE_LAYER(ReverseV2)
++DECLARE_LAYER(ReduceMax)
+ DECLARE_LAYER(ReduceSum)
+ DECLARE_LAYER(Slice)
+ DECLARE_LAYER(Softmax)
+diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
+index bc6738e..6f916f2 100644
+--- a/src/armnn/Network.cpp
++++ b/src/armnn/Network.cpp
+@@ -1478,6 +1478,12 @@ IConnectableLayer* Network::AddReverseV2Layer(const ReverseV2Descriptor& reverse
+ return m_Graph->AddLayer<ReverseV2Layer>(reversev2Descriptor, name);
+ }
+
++IConnectableLayer* Network::AddReduceMaxLayer(const ReduceMaxDescriptor& reducemaxDescriptor,
++ const char* name)
++{
++ return m_Graph->AddLayer<ReduceMaxLayer>(reducemaxDescriptor, name);
++}
++
+ IConnectableLayer* Network::AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor,
+ const char* name)
+ {
+diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
+index 95d235e..18383eb 100644
+--- a/src/armnn/Network.hpp
++++ b/src/armnn/Network.hpp
+@@ -163,6 +163,9 @@ public:
+ IConnectableLayer* AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor,
+ const char* name = nullptr) override;
+
++ IConnectableLayer* AddReduceMaxLayer(const ReduceMaxDescriptor& reducemaxDescriptor,
++ const char* name = nullptr) override;
++
+ IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor,
+ const char* name = nullptr) override;
+
+diff --git a/src/armnn/layers/ReduceMaxLayer.cpp b/src/armnn/layers/ReduceMaxLayer.cpp
+new file mode 100644
+index 0000000..21b08e4
+--- /dev/null
++++ b/src/armnn/layers/ReduceMaxLayer.cpp
+@@ -0,0 +1,70 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReduceMaxLayer.hpp"
++#include "LayerCloneBase.hpp"
++
++#include <armnn/TypesUtils.hpp>
++
++#include <armnnUtils/DataLayoutIndexed.hpp>
++
++#include <backendsCommon/WorkloadData.hpp>
++#include <backendsCommon/WorkloadFactory.hpp>
++
++using namespace armnnUtils;
++
++namespace armnn
++{
++
++ReduceMaxLayer::ReduceMaxLayer(const ReduceMaxDescriptor& param, const char* name)
++ : LayerWithParameters(1, 1, LayerType::ReduceMax, param, name)
++{
++}
++
++std::unique_ptr<IWorkload> ReduceMaxLayer::CreateWorkload(const IWorkloadFactory& factory) const
++{
++ ReduceMaxQueueDescriptor descriptor;
++ return factory.CreateReduceMax(descriptor, PrepInfoAndDesc(descriptor));
++}
++
++ReduceMaxLayer* ReduceMaxLayer::Clone(Graph& graph) const
++{
++ return CloneBase<ReduceMaxLayer>(graph, m_Param, GetName());
++}
++
++std::vector<TensorShape> ReduceMaxLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
++{
++ ARMNN_ASSERT(inputShapes.size() == 1);
++
++ const TensorShape& inputShape = inputShapes[0];
++ const DataLayoutIndexed dimensionIndices = m_Param.m_DataLayout;
++
++ unsigned int outWidth = m_Param.m_TargetWidth;
++ unsigned int outHeight = m_Param.m_TargetHeight;
++ unsigned int outChannels = inputShape[dimensionIndices.GetChannelsIndex()];
++ unsigned int outBatch = inputShape[0];
++
++ TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ?
++ TensorShape( { outBatch, outHeight, outWidth, outChannels } ) :
++ TensorShape( { outBatch, outChannels, outHeight, outWidth });
++
++ return std::vector<TensorShape>({ tensorShape });
++}
++
++void ReduceMaxLayer::ValidateTensorShapesFromInputs()
++{
++ VerifyLayerConnections(1, CHECK_LOCATION());
++
++ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
++
++ ARMNN_ASSERT(inferredShapes.size() == 1);
++}
++
++void ReduceMaxLayer::Accept(ILayerVisitor& visitor) const
++{
++ visitor.VisitReduceMaxLayer(this, GetParameters(), GetName());
++}
++
++} // namespace armnn
+diff --git a/src/armnn/layers/ReduceMaxLayer.hpp b/src/armnn/layers/ReduceMaxLayer.hpp
+new file mode 100644
+index 0000000..2f8e01c
+--- /dev/null
++++ b/src/armnn/layers/ReduceMaxLayer.hpp
+@@ -0,0 +1,48 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++#pragma once
++
++#include "LayerWithParameters.hpp"
++
++namespace armnn
++{
++
++/// This layer represents a reducemax operation.
++class ReduceMaxLayer : public LayerWithParameters<ReduceMaxDescriptor>
++{
++public:
++ /// Makes a workload for the ReduceMax type.
++ /// @param [in] graph The graph where this layer can be found.
++ /// @param [in] factory The workload factory which will create the workload.
++ /// @return A pointer to the created workload, or nullptr if not created.
++ virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override;
++
++ /// Creates a dynamically-allocated copy of this layer.
++ /// @param [in] graph The graph into which this layer is being cloned.
++ ReduceMaxLayer* Clone(Graph& graph) const override;
++
++ /// Check if the input tensor shape(s)
++ /// will lead to a valid configuration of @ref ReduceMaxLayer.
++ void ValidateTensorShapesFromInputs() override;
++
++ /// By default returns inputShapes if the number of inputs are equal to number of outputs,
++ /// otherwise infers the output shapes from given input shapes and layer properties.
++ /// @param [in] inputShapes The input shapes layer has.
++ /// @return A vector to the inferred output shape.
++ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
++
++ void Accept(ILayerVisitor& visitor) const override;
++
++protected:
++ /// Constructor to create a ReduceMaxLayer.
++ /// @param [in] param ReduceMaxDescriptor to configure the resize operation.
++ /// @param [in] name Optional name for the layer.
++ ReduceMaxLayer(const ReduceMaxDescriptor& param, const char* name);
++
++ /// Default destructor
++ ~ReduceMaxLayer() = default;
++};
++
++} // namespace armnn
+diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
+index 3da7288..05a15e5 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.cpp
++++ b/src/armnnTfLiteParser/TfLiteParser.cpp
+@@ -533,6 +533,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o
+ m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack;
+ m_ParserFunctions[tflite::BuiltinOperator_REVERSE_V2] = &TfLiteParser::ParseReverse_v2;
+ m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax;
++ m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MAX] = &TfLiteParser::ParseReduceMax;
+ m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum;
+
+ // register supported custom operators
+@@ -2814,6 +2815,46 @@ void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex)
+ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
+ }
+
++void TfLiteParser::ParseReduceMax(size_t subgraphIndex, size_t operatorIndex)
++{
++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(inputs.size(), 2);
++
++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(outputs.size(), 1);
++
++ auto layerName = boost::str(boost::format("ReduceMax:%1%:%2%") % subgraphIndex % operatorIndex);
++
++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]);
++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]);
++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
++
++ TensorShape shape = sizeTensorInfo0.GetShape();
++
++ // Get const axis value from model and set it to descriptor.
++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer);
++
++ ReduceMaxDescriptor desc;
++ desc.m_Axis = axisBufferPtr->data.data()[4];
++ desc.m_TargetHeight = shape[1];
++ desc.m_TargetWidth = shape[2];
++ desc.m_DataLayout = armnn::DataLayout::NHWC;
++
++ // Register a new layer object, ReduceMax, to in-memory network of ARMNN.
++ IConnectableLayer *layer = m_Network->AddReduceMaxLayer(desc, layerName.c_str());
++
++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
++
++ // Register input tensor to the layer.
++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]});
++
++ // Register output tensor to the layer.
++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
++}
++
+ void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex)
+ {
+ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
+diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
+index 7970559..da635ae 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.hpp
++++ b/src/armnnTfLiteParser/TfLiteParser.hpp
+@@ -135,6 +135,7 @@ private:
+ void ParseUnpack(size_t subgraphIndex, size_t operatorIndex);
+ void ParseReverse_v2(size_t subgraphIndex, size_t operatorIndex);
+ void ParseArgMax(size_t subgraphIndex, size_t operatorIndex);
++ void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex);
+ void ParseSum(size_t subgraphIndex, size_t operatorIndex);
+
+ void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot);
+diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
+index 0c1c1e2..9fb6737 100644
+--- a/src/backends/backendsCommon/LayerSupportBase.cpp
++++ b/src/backends/backendsCommon/LayerSupportBase.cpp
+@@ -623,6 +623,14 @@ bool LayerSupportBase::IsReverseV2Supported(const TensorInfo& /*input*/,
+ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+ }
+
++bool LayerSupportBase::IsReduceMaxSupported(const TensorInfo& /*input*/,
++ const TensorInfo& /*output*/,
++ const ReduceMaxDescriptor& /*descriptor*/,
++ Optional<std::string&> reasonIfUnsupported) const
++{
++ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
++}
++
+ bool LayerSupportBase::IsReduceSumSupported(const TensorInfo& /*input*/,
+ const TensorInfo& /*output*/,
+ const ReduceSumDescriptor& /*descriptor*/,
+diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
+index 6f1e6e0..1c6da1a 100644
+--- a/src/backends/backendsCommon/LayerSupportBase.hpp
++++ b/src/backends/backendsCommon/LayerSupportBase.hpp
+@@ -382,6 +382,11 @@ public:
+ const ReverseV2Descriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
++ bool IsReduceMaxSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceMaxDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
++
+ bool IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
+index dcbec11..afdfcad 100644
+--- a/src/backends/backendsCommon/WorkloadData.cpp
++++ b/src/backends/backendsCommon/WorkloadData.cpp
+@@ -3532,6 +3532,45 @@ void ReverseV2QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+ }
+ }
+
++void ReduceMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
++{
++ const std::string descriptorName{"ReduceMaxQueueDescriptor"};
++
++ ValidateNumInputs(workloadInfo, descriptorName, 1);
++ ValidateNumOutputs(workloadInfo, descriptorName, 1);
++
++ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
++ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
++
++ ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input");
++ ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output");
++
++ std::vector<DataType> supportedTypes =
++ {
++ DataType::BFloat16,
++ DataType::Float16,
++ DataType::Float32,
++ DataType::QAsymmS8,
++ DataType::QAsymmU8,
++ DataType::QSymmS16,
++ DataType::Signed32
++ };
++
++ ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName);
++ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
++
++ // ReduceMax only changes width and height: batch and channel count must match.
++ const unsigned int inputBatchSize = inputTensorInfo.GetShape()[0];
++ const unsigned int outputBatchSize = outputTensorInfo.GetShape()[0];
++ if (inputBatchSize != outputBatchSize)
++ {
++ throw InvalidArgumentException(
++ boost::str(boost::format("%1%: Input batch size (%2%) "
++ "does not match output batch size (%3%)") %
++ descriptorName % inputBatchSize % outputBatchSize));
++ }
++}
++
+ void ReduceSumQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+ {
+ const std::string descriptorName{"ReduceSumQueueDescriptor"};
+diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
+index 0cbe8aa..1db7004 100644
+--- a/src/backends/backendsCommon/WorkloadData.hpp
++++ b/src/backends/backendsCommon/WorkloadData.hpp
+@@ -639,6 +639,11 @@ struct ReverseV2QueueDescriptor : QueueDescriptorWithParameters<ReverseV2Descrip
+ void Validate(const WorkloadInfo& workloadInfo) const;
+ };
+
++struct ReduceMaxQueueDescriptor : QueueDescriptorWithParameters<ReduceMaxDescriptor>
++{
++ void Validate(const WorkloadInfo& workloadInfo) const;
++};
++
+ struct ReduceSumQueueDescriptor : QueueDescriptorWithParameters<ReduceSumDescriptor>
+ {
+ void Validate(const WorkloadInfo& workloadInfo) const;
+diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
+index 31e6bfd..639e95c 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.cpp
++++ b/src/backends/backendsCommon/WorkloadFactory.cpp
+@@ -1190,6 +1190,18 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+ reason);
+ break;
+ }
++ case LayerType::ReduceMax:
++ {
++ auto cLayer = PolymorphicDowncast<const ReduceMaxLayer*>(&layer);
++ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
++ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
++
++ result = layerSupportObject->IsReduceMaxSupported(OverrideDataType(input, dataType),
++ OverrideDataType(output, dataType),
++ cLayer->GetParameters(),
++ reason);
++ break;
++ }
+ case LayerType::ReduceSum:
+ {
+ auto cLayer = PolymorphicDowncast<const ReduceSumLayer*>(&layer);
+@@ -1614,6 +1626,12 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateReverseV2(const ReverseV2Queu
+ return std::unique_ptr<IWorkload>();
+ }
+
++std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceMax(const ReduceMaxQueueDescriptor& /*descriptor*/,
++ const WorkloadInfo& /*info*/) const
++{
++ return std::unique_ptr<IWorkload>();
++}
++
+ std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& /*descriptor*/,
+ const WorkloadInfo& /*info*/) const
+ {
+diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
+index 740da18..b068b45 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.hpp
++++ b/src/backends/backendsCommon/WorkloadFactory.hpp
+@@ -254,6 +254,9 @@ public:
+ virtual std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
+
++ virtual std::unique_ptr<IWorkload> CreateReduceMax(const ReduceMaxQueueDescriptor& descriptor,
++ const WorkloadInfo& info) const;
++
+ virtual std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
+ };
+diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
+index 1ac947e..91bb954 100644
+--- a/src/backends/reference/RefLayerSupport.cpp
++++ b/src/backends/reference/RefLayerSupport.cpp
+@@ -2161,6 +2161,36 @@ bool RefLayerSupport::IsReverseV2Supported(const TensorInfo& input,
+ return supported;
+ }
+
++bool RefLayerSupport::IsReduceMaxSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceMaxDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported) const
++{
++ IgnoreUnused(descriptor);
++ bool supported = true;
++ std::array<DataType,7> supportedTypes =
++ {
++ DataType::BFloat16,
++ DataType::Float32,
++ DataType::Float16,
++ DataType::QAsymmS8,
++ DataType::QAsymmU8,
++ DataType::QSymmS16,
++ DataType::Signed32
++ };
++
++ supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
++ "Reference ReduceMax: input type not supported");
++
++ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
++ "Reference ReduceMax: output type not supported");
++
++ supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
++ "Reference ReduceMax: input and output types not matching");
++
++ return supported;
++}
++
+ bool RefLayerSupport::IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
+index cdc2adb..c095f7c 100644
+--- a/src/backends/reference/RefLayerSupport.hpp
++++ b/src/backends/reference/RefLayerSupport.hpp
+@@ -347,6 +347,11 @@ public:
+ const ReverseV2Descriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
++ bool IsReduceMaxSupported(const TensorInfo& input,
++ const TensorInfo& output,
++ const ReduceMaxDescriptor& descriptor,
++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
++
+ bool IsReduceSumSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceSumDescriptor& descriptor,
+diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
+index 7d1e810..c74516e 100644
+--- a/src/backends/reference/RefWorkloadFactory.cpp
++++ b/src/backends/reference/RefWorkloadFactory.cpp
+@@ -638,6 +638,12 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReverseV2(const ReverseV2Qu
+ return std::make_unique<RefReverseV2Workload>(descriptor, info);
+ }
+
++std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceMax(const ReduceMaxQueueDescriptor& descriptor,
++ const WorkloadInfo& info) const
++{
++ return std::make_unique<RefReduceMaxWorkload>(descriptor, info);
++}
++
+ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+ {
+diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
+index 14df6b8..289d996 100644
+--- a/src/backends/reference/RefWorkloadFactory.hpp
++++ b/src/backends/reference/RefWorkloadFactory.hpp
+@@ -253,6 +253,9 @@ public:
+ std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
++ std::unique_ptr<IWorkload> CreateReduceMax(const ReduceMaxQueueDescriptor& descriptor,
++ const WorkloadInfo& info) const override;
++
+ std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
+diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
+index f68a673..7eaa615 100644
+--- a/src/backends/reference/workloads/CMakeLists.txt
++++ b/src/backends/reference/workloads/CMakeLists.txt
+@@ -49,6 +49,8 @@ list(APPEND armnnRefBackendWorkloads_sources
+ Minimum.hpp
+ Pad.cpp
+ Pad.hpp
++ ReduceMax.cpp
++ ReduceMax.hpp
+ ReduceSum.cpp
+ ReduceSum.hpp
+ ReverseV2.cpp
+@@ -178,6 +180,8 @@ list(APPEND armnnRefBackendWorkloads_sources
+ TransposeConvolution2d.hpp
+ RefReverseV2Workload.cpp
+ RefReverseV2Workload.hpp
++ RefReduceMaxWorkload.cpp
++ RefReduceMaxWorkload.hpp
+ RefReduceSumWorkload.cpp
+ RefReduceSumWorkload.hpp
+ )
+diff --git a/src/backends/reference/workloads/ReduceMax.cpp b/src/backends/reference/workloads/ReduceMax.cpp
+new file mode 100644
+index 0000000..d956201
+--- /dev/null
++++ b/src/backends/reference/workloads/ReduceMax.cpp
+@@ -0,0 +1,71 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReduceMax.hpp"
++
++#include <armnnUtils/TensorUtils.hpp>
++
++#include <boost/numeric/conversion/cast.hpp>
++
++namespace armnn
++{
++
++void ReduceMax(Decoder<float>& in, float *out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, int32_t axis)
++{
++ IgnoreUnused(outputTensorInfo);
++
++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
++ const unsigned int batchSize = inputTensorInfo.GetShape()[0];
++ const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
++ const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
++ const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
++ uAxis + 1,
++ inputTensorInfo.GetNumDimensions());
++
++ if (batchSize > 0 && uAxis == 2 && inputTensorInfo.GetShape().GetNumDimensions() == 4) {
++ unsigned int height = inputTensorInfo.GetShape()[1];
++ unsigned int width = inputTensorInfo.GetShape()[2];
++ unsigned int channel = inputTensorInfo.GetShape()[3];
++
++ for (unsigned int b = 0; b < batchSize; ++b) {
++ for (unsigned int c = 0; c < channel; ++c) {
++ auto tmpValue = in.Get();
++ for (unsigned int y = 0; y < height; ++y) {
++ for (unsigned int x = 0; x < width; ++x) {
++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c];
++ const auto& value = in.Get();
++
++ if (value >= tmpValue)
++ tmpValue = value;
++ }
++ }
++
++ out[b * channel + c] = tmpValue;
++ tmpValue = 0.0f;
++ }
++ }
++
++ return;
++ }
++
++
++ for (unsigned int outer = 0; outer < outerElements; ++outer) {
++ for (unsigned int inner = 0; inner < innerElements; ++inner) {
++ in[outer * axisSize * innerElements + inner];
++ auto tmpValue = in.Get();
++ for (unsigned int i = 1; i < axisSize; ++i) {
++ in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
++ const auto& value = in.Get();
++ if (value >= tmpValue) {
++ tmpValue = value;
++ }
++ }
++ out[outer * innerElements + inner] = tmpValue;
++ }
++ }
++}
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/ReduceMax.hpp b/src/backends/reference/workloads/ReduceMax.hpp
+new file mode 100644
+index 0000000..3d9877a
+--- /dev/null
++++ b/src/backends/reference/workloads/ReduceMax.hpp
+@@ -0,0 +1,20 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include "armnn/Tensor.hpp"
++#include "armnn/Descriptors.hpp"
++
++#include "Decoders.hpp"
++
++namespace armnn
++{
++
++void ReduceMax(Decoder<float>& in, float* out, const TensorInfo& inputTensorInfo,
++ const TensorInfo& outputTensorInfo, int32_t axis);
++
++} //namespace armnn
++
+diff --git a/src/backends/reference/workloads/RefReduceMaxWorkload.cpp b/src/backends/reference/workloads/RefReduceMaxWorkload.cpp
+new file mode 100644
+index 0000000..82eb280
+--- /dev/null
++++ b/src/backends/reference/workloads/RefReduceMaxWorkload.cpp
+@@ -0,0 +1,35 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "RefReduceMaxWorkload.hpp"
++
++#include "ReduceMax.hpp"
++#include "RefWorkloadUtils.hpp"
++#include "BaseIterator.hpp"
++#include "Profiling.hpp"
++
++#include "BaseIterator.hpp"
++#include "Decoders.hpp"
++#include "Encoders.hpp"
++
++namespace armnn
++{
++
++void RefReduceMaxWorkload::Execute() const
++{
++ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceMaxWorkload_Execute");
++
++ const TensorInfo& inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]);
++ const TensorInfo& outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]);
++
++ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map());
++ Decoder<float> &decoder = *decoderPtr;
++
++ float *output = GetOutputTensorData<float>(0, m_Data);
++
++ ReduceMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Axis);
++}
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/RefReduceMaxWorkload.hpp b/src/backends/reference/workloads/RefReduceMaxWorkload.hpp
+new file mode 100644
+index 0000000..df9cb1e
+--- /dev/null
++++ b/src/backends/reference/workloads/RefReduceMaxWorkload.hpp
+@@ -0,0 +1,21 @@
++//
++// Copyright © 2017 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include <backendsCommon/Workload.hpp>
++#include <backendsCommon/WorkloadData.hpp>
++
++namespace armnn
++{
++
++class RefReduceMaxWorkload : public BaseWorkload<ReduceMaxQueueDescriptor>
++{
++public:
++ using BaseWorkload<ReduceMaxQueueDescriptor>::BaseWorkload;
++ virtual void Execute() const override;
++};
++
++} //namespace armnn
+diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
+index 9427d5c..140dca8 100644
+--- a/src/backends/reference/workloads/RefWorkloads.hpp
++++ b/src/backends/reference/workloads/RefWorkloads.hpp
+@@ -68,4 +68,5 @@
+ #include "Splitter.hpp"
+ #include "TensorBufferArrayView.hpp"
+ #include "RefReverseV2Workload.hpp"
++#include "RefReduceMaxWorkload.hpp"
+ #include "RefReduceSumWorkload.hpp"
+--
+2.7.4
+
diff --git a/packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch b/packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch
new file mode 100644
index 000000000..6bd599e3c
--- /dev/null
+++ b/packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch
@@ -0,0 +1,333 @@
+From bf35ac04008cf78641b510c21219bfd7163dfeb8 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Tue, 8 Sep 2020 11:56:48 +0900
+Subject: [PATCH 07/10] backends/test: Add ReduceMax op test cases
+
+Change-Id: Iebe168dc646981f8a9ab62efc2c6c14aed8d9f84
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ src/backends/backendsCommon/test/CMakeLists.txt | 2 +
+ src/backends/backendsCommon/test/LayerTests.hpp | 1 +
+ .../test/layerTests/ReduceMaxTestImpl.cpp | 230 +++++++++++++++++++++
+ .../test/layerTests/ReduceMaxTestImpl.hpp | 29 +++
+ src/backends/reference/test/RefLayerTests.cpp | 6 +
+ 5 files changed, 268 insertions(+)
+ create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp
+ create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp
+
+diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
+index e548b2b..9b8ba74 100644
+--- a/src/backends/backendsCommon/test/CMakeLists.txt
++++ b/src/backends/backendsCommon/test/CMakeLists.txt
+@@ -127,6 +127,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources
+ layerTests/PreluTestImpl.hpp
+ layerTests/QuantizeTestImpl.cpp
+ layerTests/QuantizeTestImpl.hpp
++ layerTests/ReduceMaxTestImpl.cpp
++ layerTests/ReduceMaxTestImpl.hpp
+ layerTests/ReduceSumTestImpl.cpp
+ layerTests/ReduceSumTestImpl.hpp
+ layerTests/ReshapeTestImpl.cpp
+diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
+index b3cfe4a..b86e8d7 100644
+--- a/src/backends/backendsCommon/test/LayerTests.hpp
++++ b/src/backends/backendsCommon/test/LayerTests.hpp
+@@ -44,6 +44,7 @@
+ #include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
++#include <backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ReverseV2TestImpl.hpp>
+ #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
+diff --git a/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp
+new file mode 100644
+index 0000000..81cebb6
+--- /dev/null
++++ b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp
+@@ -0,0 +1,230 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#include "ReduceMaxTestImpl.hpp"
++
++#include <backendsCommon/test/DataTypeUtils.hpp>
++#include <backendsCommon/test/TensorCopyUtils.hpp>
++#include <backendsCommon/test/WorkloadTestUtils.hpp>
++
++#include <test/TensorHelpers.hpp>
++
++namespace
++{
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceMaxTestCommon(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
++ const armnn::TensorInfo inputTensorInfo,
++ const armnn::TensorInfo outputTensorInfo,
++ const std::vector<float>& inputData,
++ const std::vector<float>& outputData,
++ int axis = 3)
++{
++ IgnoreUnused(memoryManager);
++ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
++
++ LayerTestResult<float, 4> result(outputTensorInfo);
++ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
++
++ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
++ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
++
++ armnn::ReduceMaxQueueDescriptor descriptor;
++ unsigned int updated_idx = static_cast<uint32_t>(axis);
++ if (axis < 0) {
++ updated_idx = static_cast<uint32_t>(static_cast<int32_t>(inputTensorInfo.GetNumDimensions()) + axis);
++ }
++
++ descriptor.m_Parameters.m_Axis = updated_idx;
++ descriptor.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW;
++ armnn::WorkloadInfo info;
++
++ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
++ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
++
++ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduceMax(descriptor, info);
++
++ inputHandle->Allocate();
++ outputHandle->Allocate();
++
++ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin());
++
++ workload->Execute();
++
++ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
++
++ return result;
++}
++
++} // namespace
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceMaxSimpleTest(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 1, 1, 5 };
++ const armnn::TensorShape outputShape{ 1, 1, 1, 1};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f });
++ std::vector<float> outputValues({ 10.0f });
++
++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, -1);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceMaxMultiChannel_1Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
++ const armnn::TensorShape outputShape{ 1, 1, 2, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++ std::vector<float> outputValues({ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++
++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 1);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceMaxMultiChannel_2Test(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
++ const armnn::TensorShape outputShape{ 1, 1, 1, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f,
++ 5.0f, 6.0f, 7.0f, 8.0f,
++
++ 10.0f, 20.0f, 30.0f, 40.0f,
++ 50.0f, 60.0f, 70.0f, 80.0f,
++
++ 100.0f, 200.0f, 300.0f, 400.0f,
++ 500.0f, 600.0f, 700.0f, 800.0f });
++ std::vector<float> outputValues({ 500.0f, 600.0f, 700.0f, 800.0f });
++
++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 2);
++}
++
++template<armnn::DataType ArmnnType, typename T>
++LayerTestResult<float, 4> ReduceMaxMultiBatchAndChannelTest(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
++{
++ const armnn::TensorShape inputShape{ 3, 2, 3, 4 };
++ const armnn::TensorShape outputShape{ 3, 1, 1, 4};
++
++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
++
++ if (armnn::IsQuantizedType<T>())
++ {
++ inputTensorInfo.SetQuantizationScale(1.0f);
++ inputTensorInfo.SetQuantizationOffset(0);
++ }
++
++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
++
++ std::vector<float> inputValues( {7, 8, 6, 1,
++ 1, 1, 8, 7,
++ 3, 7, 7, 7,
++
++ 6, 8, 4, 7,
++ 3, 8, 7, 3,
++ 5, 8, 8, 8,
++
++
++ 7, 8, 2, 7,
++ 3, 8, 5, 6,
++ 8, 4, 2, 7,
++
++ 1, 6, 7, 2,
++ 8, 3, 3, 1,
++ 7, 6, 2, 6,
++
++
++ 5, 3, 4, 8,
++ 7, 8, 2, 4,
++ 6, 6, 2, 8,
++
++ 2, 2, 7, 2,
++ 5, 3, 6, 3,
++ 6, 1, 8, 8});
++ std::vector<float> outputValues({ 7.0f, 8.0f, 8.0f, 8.0f,
++ 8.0f, 8.0f, 7.0f, 7.0f,
++ 7.0f, 8.0f, 8.0f, 8.0f});
++
++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager,
++ inputTensorInfo, outputTensorInfo,
++ inputValues, outputValues, 2);
++}
++
++// Explicit template specializations
++
++template LayerTestResult<float, 4>
++ReduceMaxSimpleTest<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceMaxMultiChannel_1Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceMaxMultiChannel_2Test<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template LayerTestResult<float, 4>
++ReduceMaxMultiBatchAndChannelTest<armnn::DataType::Float32>(
++ armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+diff --git a/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp
+new file mode 100644
+index 0000000..f6691aa
+--- /dev/null
++++ b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp
+@@ -0,0 +1,29 @@
++//
++// Copyright © 2019 Arm Ltd. All rights reserved.
++// SPDX-License-Identifier: MIT
++//
++
++#pragma once
++
++#include "LayerTestResult.hpp"
++
++#include <ResolveType.hpp>
++
++#include <armnn/backends/IBackendInternal.hpp>
++#include <backendsCommon/WorkloadFactory.hpp>
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceMaxSimpleTest(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceMaxMultiChannel_1Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceMaxMultiChannel_2Test(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
++
++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
++LayerTestResult<float, 4> ReduceMaxMultiBatchAndChannelTest(armnn::IWorkloadFactory& workloadFactory,
++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
+index a5b1f9d..3ce4fe2 100644
+--- a/src/backends/reference/test/RefLayerTests.cpp
++++ b/src/backends/reference/test/RefLayerTests.cpp
+@@ -1980,6 +1980,12 @@ ARMNN_AUTO_TEST_CASE(Neg3dQuantisedAsymm8, Neg3dTest<DataType::QAsymmU8>)
+ ARMNN_AUTO_TEST_CASE(Neg2dQuantisedSymm16, Neg2dTest<DataType::QSymmS16>)
+ ARMNN_AUTO_TEST_CASE(Neg3dQuantisedSymm16, Neg3dTest<DataType::QSymmS16>)
+
++// ReduceMax
++ARMNN_AUTO_TEST_CASE(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceMaxMultiChannelFloat32, ReduceMaxMultiChannel_1Test<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceMaxMultiChannel2Float32, ReduceMaxMultiChannel_2Test<DataType::Float32>)
++ARMNN_AUTO_TEST_CASE(ReduceMaxMultiBatchAndChannelFloat32, ReduceMaxMultiBatchAndChannelTest<DataType::Float32>)
++
+ // ReduceSum
+ ARMNN_AUTO_TEST_CASE(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
+ ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannelFloat32, ReduceSumMultiChannel_1Test<DataType::Float32>)
+--
+2.7.4
+
diff --git a/packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch b/packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch
new file mode 100644
index 000000000..5aefaca18
--- /dev/null
+++ b/packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch
@@ -0,0 +1,98 @@
+From a5302d101304c0d62ce87bb0cae98190badece93 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Tue, 8 Sep 2020 12:25:40 +0900
+Subject: [PATCH 08/10] armnnTfLiteParser: Add Division op support
+
+Change-Id: Ib4bd0238f2cf19103e17a9d4fe03a30ab2615aa8
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ include/armnn/DescriptorsFwd.hpp | 1 +
+ src/armnnTfLiteParser/TfLiteParser.cpp | 40 ++++++++++++++++++++++++++++++++++
+ src/armnnTfLiteParser/TfLiteParser.hpp | 1 +
+ 3 files changed, 42 insertions(+)
+
+diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
+index 3b736f9..3b0f01f 100644
+--- a/include/armnn/DescriptorsFwd.hpp
++++ b/include/armnn/DescriptorsFwd.hpp
+@@ -36,6 +36,7 @@ struct ResizeDescriptor;
+ struct ReverseV2Descriptor;
+ struct ReduceMaxDescriptor;
+ struct ReduceSumDescriptor;
++struct DivisionDescriptor;
+ struct SoftmaxDescriptor;
+ struct SpaceToBatchNdDescriptor;
+ struct SpaceToDepthDescriptor;
+diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
+index 05a15e5..da30ac8 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.cpp
++++ b/src/armnnTfLiteParser/TfLiteParser.cpp
+@@ -535,6 +535,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o
+ m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax;
+ m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MAX] = &TfLiteParser::ParseReduceMax;
+ m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum;
++ m_ParserFunctions[tflite::BuiltinOperator_DIV] = &TfLiteParser::ParseDiv;
+
+ // register supported custom operators
+ m_CustomParserFunctions["TFLite_Detection_PostProcess"] = &TfLiteParser::ParseDetectionPostProcess;
+@@ -2899,6 +2900,45 @@ void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex)
+ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes);
+ }
+
++void TfLiteParser::ParseDiv(size_t subgraphIndex, size_t operatorIndex)
++{
++ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
++ const auto *options = operatorPtr->builtin_options.AsDivOptions();
++
++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
++
++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(inputs.size(), 2);
++
++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
++ CHECK_VALID_SIZE(outputs.size(), 1);
++
++ armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]);
++ armnn::TensorInfo input1TensorInfo = ToTensorInfo(inputs[1]);
++
++ auto layerName = boost::str(boost::format("Div:%1%:%2%") % subgraphIndex % operatorIndex);
++
++ IConnectableLayer* layer = m_Network->AddDivisionLayer(layerName.c_str());
++
++ TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
++
++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ if (inputTensorInfo.GetNumDimensions() != input1TensorInfo.GetNumDimensions())
++ {
++ AddBroadcastReshapeLayer(subgraphIndex, operatorIndex, layer);
++ }
++ else
++ {
++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0], inputTensorIndexes[1]});
++ }
++
++ layer = AddFusedActivationLayer(layer, 0, options->fused_activation_function);
++
++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
++}
++
+ armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer,
+ unsigned int outputSlot,
+ tflite::ActivationFunctionType activationType)
+diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
+index da635ae..691716b 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.hpp
++++ b/src/armnnTfLiteParser/TfLiteParser.hpp
+@@ -137,6 +137,7 @@ private:
+ void ParseArgMax(size_t subgraphIndex, size_t operatorIndex);
+ void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex);
+ void ParseSum(size_t subgraphIndex, size_t operatorIndex);
++ void ParseDiv(size_t subgraphIndex, size_t operatorIndex);
+
+ void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot);
+ void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot);
+--
+2.7.4
+
diff --git a/packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch b/packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch
new file mode 100644
index 000000000..1d0bebea0
--- /dev/null
+++ b/packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch
@@ -0,0 +1,40 @@
+From 7ca13b0bc9daf0cbe379727d4662c2f7fbc25164 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Wed, 9 Sep 2020 14:58:51 +0900
+Subject: [PATCH 09/10] tfLiteParser: Fix axis value for Pack op
+
+Change-Id: I53bcdf193cfac4f8ca9d157943a9d1687164f862
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ src/armnnTfLiteParser/TfLiteParser.cpp | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
+index da30ac8..f51ffc7 100644
+--- a/src/armnnTfLiteParser/TfLiteParser.cpp
++++ b/src/armnnTfLiteParser/TfLiteParser.cpp
+@@ -2372,12 +2372,19 @@ void TfLiteParser::ParsePack(size_t subgraphIndex, size_t operatorIndex)
+ const auto& operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
+ const auto* options = operatorPtr->builtin_options.AsPackOptions();
+
++ armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]);
++
++ uint32_t updated_axis = 0;
++ if (options->axis != 0)
++ updated_axis = inputTensorInfo.GetNumDimensions();
++ else
++ updated_axis = inputTensorInfo.GetNumDimensions() - 1;
++
+ StackDescriptor desc;
+- desc.m_Axis = static_cast<uint32_t>(options->axis);
++ desc.m_Axis = updated_axis;
+ desc.m_NumInputs = static_cast<uint32_t>(inputs.size());
+
+ // Use the tensor shape of the first input as the "correct" input shape in the descriptor
+- armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]);
+ desc.m_InputShape = inputTensorInfo.GetShape();
+
+ auto layerName = boost::str(boost::format("Pack:%1%:%2%") % subgraphIndex % operatorIndex);
+--
+2.7.4
+
diff --git a/packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch b/packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch
new file mode 100644
index 000000000..49202b0db
--- /dev/null
+++ b/packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch
@@ -0,0 +1,39 @@
+From e389db5b0ba3a8a58a7426f53d32047a46448aa9 Mon Sep 17 00:00:00 2001
+From: Inki Dae <inki.dae@samsung.com>
+Date: Wed, 9 Sep 2020 15:17:35 +0900
+Subject: [PATCH 10/10] backends: Skip ArgMax op for GpuAcc
+
+With ArgMax op for GpuAcc CL kernel compiling failed.
+So skip using this op and try to use the one for CpuRef
+in case of GPU type.
+
+This is a workaround patch so should be fixed with generic way.
+
+Change-Id: I2993be977b7227322d0a446da88506fa60f28e4c
+Signed-off-by: Inki Dae <inki.dae@samsung.com>
+---
+ src/backends/backendsCommon/WorkloadFactory.cpp | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
+index 639e95c..3c8cb35 100644
+--- a/src/backends/backendsCommon/WorkloadFactory.cpp
++++ b/src/backends/backendsCommon/WorkloadFactory.cpp
+@@ -62,6 +62,14 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+ return false;
+ }
+
++#if 1 // Workaround.
++ if ((backendId == armnn::Compute::GpuAcc) &&
++ (layer.GetNameStr().find("ArgMax") != std::string::npos)) {
++ std::cout << __func__ << " => Skipped " << layer.GetName() << std::endl;
++ return false;
++ }
++#endif
++
+ auto backendFactory = backendRegistry.GetFactory(backendId);
+ auto backendObject = backendFactory();
+ auto layerSupportObject = backendObject->GetLayerSupport();
+--
+2.7.4
+
diff --git a/packaging/armnn.spec b/packaging/armnn.spec
index 9ff541622..b112d4b86 100644
--- a/packaging/armnn.spec
+++ b/packaging/armnn.spec
@@ -8,6 +8,18 @@ Source0: %{name}-%{version}.tar.gz
Source1001: %{name}.manifest
ExclusiveArch: %arm aarch64
+# Patch set for hand gesture model support from AIC
+Source2001: 0001-backens-reference-Add-ArgMinMax-op-support.patch
+Source2002: 0002-backends-reference-Add-ReduceSum-op-support.patch
+Source2003: 0003-backends-test-Add-ReduceSum-test-cases.patch
+Source2004: 0004-backends-reference-Add-ReverseV2-op-support.patch
+Source2005: 0005-backends-test-Add-ReverseV2-op-test-cases.patch
+Source2006: 0006-backends-reference-Add-ReduceMax-op-support.patch
+Source2007: 0007-backends-test-Add-ReduceMax-op-test-cases.patch
+Source2008: 0008-armnnTfLiteParser-Add-Division-op-support.patch
+Source2009: 0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch
+Source2010: 0010-backends-Skip-ArgMax-op-for-GpuAcc.patch
+
%define TF_LITE_SUPPORT 1
%define TF_SUPPORT 0
%define CAFFE_SUPPORT 0
@@ -63,6 +75,28 @@ Summary: Sample application and benchmark binaries to test ARM Neural Network Li
%setup -q
cp %{SOURCE1001} .
+cp %{SOURCE2001} .
+cp %{SOURCE2002} .
+cp %{SOURCE2003} .
+cp %{SOURCE2004} .
+cp %{SOURCE2005} .
+cp %{SOURCE2006} .
+cp %{SOURCE2007} .
+cp %{SOURCE2008} .
+cp %{SOURCE2009} .
+cp %{SOURCE2010} .
+
+patch -p1 < %{SOURCE2001}
+patch -p1 < %{SOURCE2002}
+patch -p1 < %{SOURCE2003}
+patch -p1 < %{SOURCE2004}
+patch -p1 < %{SOURCE2005}
+patch -p1 < %{SOURCE2006}
+patch -p1 < %{SOURCE2007}
+patch -p1 < %{SOURCE2008}
+patch -p1 < %{SOURCE2009}
+patch -p1 < %{SOURCE2010}
+
%build
#compile proto files