diff options
author | Inki Dae <inki.dae@samsung.com> | 2020-09-09 15:59:32 +0900 |
---|---|---|
committer | Inki Dae <inki.dae@samsung.com> | 2020-09-09 16:24:56 +0900 |
commit | 0f13bac7ee630e6c4693a3d0554c312951151ff9 (patch) | |
tree | 92206702922a79d60ff30e9944a5c1bacbbbfafa | |
parent | a5ede2282c1fb9efccb4cd5f4328576586267809 (diff) | |
download | armnn-accepted/tizen/6.0/unified/hotfix/20201102.235546.tar.gz armnn-accepted/tizen/6.0/unified/hotfix/20201102.235546.tar.bz2 armnn-accepted/tizen/6.0/unified/hotfix/20201102.235546.zip |
packaging: Add patch set for hand gesture model supporttizen_6.0.m2_releasesubmit/tizen_6.0_hotfix/20201103.115101submit/tizen_6.0_hotfix/20201102.192901submit/tizen_6.0/20201029.205501submit/tizen/20200910.024357accepted/tizen/unified/20200911.043129accepted/tizen/6.0/unified/hotfix/20201102.235546accepted/tizen/6.0/unified/20201030.105326tizen_6.0_hotfixtizen_6.0accepted/tizen_6.0_unified_hotfixaccepted/tizen_6.0_unified
This patch adds a patch set for hand gesture model support.
With this patch set, the hand gesture model from AIC works well.
What this patch set does,
- Add ArgMax op support
- Add ReduceSum op support
- Add ReverseV2 op support
- Add ReduceMax op support
- Add Division op support
- Fix wrong axis value for Pack op
- Skip ArgMax op for GpuAcc
This patch set will be upstreamed to ARMNN mainline.
Change-Id: I23967e3f1948cd07b11798389a4d362fbf346a15
Signed-off-by: Inki Dae <inki.dae@samsung.com>
11 files changed, 4248 insertions, 0 deletions
diff --git a/packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch b/packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch new file mode 100644 index 000000000..45aafa3b4 --- /dev/null +++ b/packaging/0001-backens-reference-Add-ArgMinMax-op-support.patch @@ -0,0 +1,350 @@ +From c7fd8aa7013557d330e62baf24738c5853000170 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Fri, 4 Sep 2020 15:19:36 +0900 +Subject: [PATCH 01/10] backends/reference: Add ArgMinMax op support + +This patch adds ArgMinMax op support. + +Current ARMNN has ArgMinMax op but not used, and +it doesn't support int64 type for output tensor of this op. + +So this patch adds a new type, int64 tensor type, and +also adds ArgMinMax computation function for int64 type. + +In default, tensorflow lite parser parses int64 type for output tensor +of ArgMinMax op so this patch makes a proper function - ArgMinMax op +for int64 or int32 - according to parsed output_type value. + +With this patch, ARMNN supports both types - int64 and int32 - for +ArgMinMax op. + +Change-Id: Ife213835a5531b99f32dbf73a73909e108dde89c +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + include/armnn/Descriptors.hpp | 5 ++- + include/armnn/Types.hpp | 1 + + include/armnn/TypesUtils.hpp | 2 + + src/armnnTfLiteParser/TfLiteParser.cpp | 46 +++++++++++++++++++++- + src/armnnTfLiteParser/TfLiteParser.hpp | 1 + + src/backends/aclCommon/ArmComputeTensorUtils.cpp | 2 + + src/backends/backendsCommon/WorkloadData.cpp | 8 +--- + src/backends/reference/test/ArgMinMaxTests.cpp | 12 +++--- + src/backends/reference/workloads/ArgMinMax.cpp | 33 ++++++++++++++++ + src/backends/reference/workloads/ArgMinMax.hpp | 3 ++ + src/backends/reference/workloads/CMakeLists.txt | 2 - + .../reference/workloads/RefArgMinMaxWorkload.cpp | 13 ++++-- + 12 files changed, 108 insertions(+), 20 deletions(-) + +diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp +index 13765e8..364e447 100644 +--- a/include/armnn/Descriptors.hpp ++++ b/include/armnn/Descriptors.hpp +@@ -53,17 +53,20 @@ struct ArgMinMaxDescriptor + ArgMinMaxDescriptor() + : m_Function(ArgMinMaxFunction::Min) + , m_Axis(-1) ++ , m_Output_Type(armnn::DataType::Signed64) + {} + + bool operator ==(const ArgMinMaxDescriptor &rhs) const + { +- return m_Function == rhs.m_Function && m_Axis == rhs.m_Axis; ++ return m_Function == rhs.m_Function && m_Axis == rhs.m_Axis && m_Output_Type == rhs.m_Output_Type; + } + + /// Specify if the function is to find Min or Max. + ArgMinMaxFunction m_Function; + /// Axis to reduce across the input tensor. + int m_Axis; ++ // Tensor data type and this could be int32 or int64. Default type is int64. ++ armnn::DataType m_Output_Type; + }; + + /// A ComparisonDescriptor for the ComparisonLayer +diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp +index e58cecf..390288f 100644 +--- a/include/armnn/Types.hpp ++++ b/include/armnn/Types.hpp +@@ -41,6 +41,7 @@ enum class DataType + QSymmS8 = 7, + QAsymmS8 = 8, + BFloat16 = 9, ++ Signed64 = 10, + + QuantisedAsymm8 ARMNN_DEPRECATED_ENUM_MSG("Use DataType::QAsymmU8 instead.") = QAsymmU8, + QuantisedSymm16 ARMNN_DEPRECATED_ENUM_MSG("Use DataType::QSymmS16 instead.") = QSymmS16 +diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp +index 5065152..decd04e 100644 +--- a/include/armnn/TypesUtils.hpp ++++ b/include/armnn/TypesUtils.hpp +@@ -120,6 +120,7 @@ constexpr unsigned int GetDataTypeSize(DataType dataType) + case DataType::Float16: return 2U; + case DataType::Float32: + case DataType::Signed32: return 4U; ++ case DataType::Signed64: return 8U; + case DataType::QAsymmU8: return 1U; + case DataType::QAsymmS8: return 1U; + case DataType::QSymmS8: return 1U; +@@ -171,6 +172,7 @@ constexpr const char* GetDataTypeName(DataType dataType) + { + case DataType::Float16: return "Float16"; + case DataType::Float32: return "Float32"; ++ case DataType::Signed64: return "Signed64"; + case DataType::QAsymmU8: return "QAsymmU8"; + case DataType::QAsymmS8: return "QAsymmS8"; + case DataType::QSymmS8: return "QSymmS8"; +diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp +index 21b1bce..6c1a64b 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.cpp ++++ b/src/armnnTfLiteParser/TfLiteParser.cpp +@@ -342,7 +342,9 @@ armnn::TensorInfo ToTensorInfo(TfLiteParser::TensorRawPtr tensorPtr, const std:: + case tflite::TensorType_INT32: + type = armnn::DataType::Signed32; + break; +- ++ case tflite::TensorType_INT64: ++ type = armnn::DataType::Signed64; ++ break; + default: + { + CheckLocation location = CHECK_LOCATION(); +@@ -529,6 +531,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o + m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE] = &TfLiteParser::ParseTranspose; + m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV] = &TfLiteParser::ParseTransposeConv; + m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack; ++ m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax; + + // register supported custom operators + m_CustomParserFunctions["TFLite_Detection_PostProcess"] = &TfLiteParser::ParseDetectionPostProcess; +@@ -2728,6 +2731,47 @@ void TfLiteParser::ParseSplitV(size_t subgraphIndex, size_t operatorIndex) + RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); + } + ++void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex) ++{ ++ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex]; ++ const auto *options = operatorPtr->builtin_options.AsArgMaxOptions(); ++ ++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(inputs.size(), 2); ++ ++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(outputs.size(), 1); ++ ++ auto layerName = boost::str(boost::format("ArgMax:%1%:%2%") % subgraphIndex % operatorIndex); ++ ++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]); ++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]); ++ ++ // Get const axis value from model and set it to descriptor. ++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer); ++ ++ ArgMinMaxDescriptor desc; ++ desc.m_Axis = axisBufferPtr->data.data()[0]; ++ // If output_type is int32 then set Signed32 else Signed64. Default type is Signed64. ++ desc.m_Output_Type = options->output_type == 3 ? armnn::DataType::Signed32 : armnn::DataType::Signed64; ++ desc.m_Function = ArgMinMaxFunction::Max; ++ ++ // Register a ArgMax layer. ++ IConnectableLayer *layer = m_Network->AddArgMinMaxLayer(desc, layerName.c_str()); ++ ++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); ++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); ++ ++ // Register input tensor to the layer. ++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]}); ++ ++ // Register output tensor to the layer. ++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); ++} ++ + armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer, + unsigned int outputSlot, + tflite::ActivationFunctionType activationType) +diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp +index c72f7ad..478edad 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.hpp ++++ b/src/armnnTfLiteParser/TfLiteParser.hpp +@@ -133,6 +133,7 @@ private: + void ParseTranspose(size_t subgraphIndex, size_t operatorIndex); + void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex); + void ParseUnpack(size_t subgraphIndex, size_t operatorIndex); ++ void ParseArgMax(size_t subgraphIndex, size_t operatorIndex); + + void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot); + void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot); +diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp +index 00ebc9c..d8b8bab 100644 +--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp ++++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp +@@ -31,6 +31,8 @@ arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multi + return arm_compute::DataType::QASYMM8; + case armnn::DataType::QSymmS16: + return arm_compute::DataType::QSYMM16; ++ case armnn::DataType::Signed64: ++ return arm_compute::DataType::S64; + case armnn::DataType::QSymmS8: + { + return multiScales ? arm_compute::DataType::QSYMM8_PER_CHANNEL : arm_compute::DataType::QSYMM8; +diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp +index 8f751c4..69bebbc 100644 +--- a/src/backends/backendsCommon/WorkloadData.cpp ++++ b/src/backends/backendsCommon/WorkloadData.cpp +@@ -623,11 +623,6 @@ void ArgMinMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const + const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; + +- if (outputTensorInfo.GetDataType() != DataType::Signed32) +- { +- throw InvalidArgumentException(descriptorName + ": Output of ArgMinMax layer must be Int32."); +- } +- + std::vector<DataType> supportedInputTypes = + { + DataType::BFloat16, +@@ -636,7 +631,8 @@ void ArgMinMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS16, +- DataType::Signed32 ++ DataType::Signed32, ++ DataType::Signed64 + }; + + ValidateDataTypes(inputTensorInfo, supportedInputTypes, descriptorName); +diff --git a/src/backends/reference/test/ArgMinMaxTests.cpp b/src/backends/reference/test/ArgMinMaxTests.cpp +index 201a2c0..dce15b2 100644 +--- a/src/backends/reference/test/ArgMinMaxTests.cpp ++++ b/src/backends/reference/test/ArgMinMaxTests.cpp +@@ -12,11 +12,11 @@ BOOST_AUTO_TEST_SUITE(RefArgMinMax) + BOOST_AUTO_TEST_CASE(ArgMinTest) + { + const armnn::TensorInfo inputInfo({ 1, 2, 3 } , armnn::DataType::Float32); +- const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Float32); ++ const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Signed64); + + std::vector<float> inputValues({ 1.0f, 5.0f, 3.0f, 4.0f, 2.0f, 6.0f}); +- std::vector<int32_t> outputValues(outputInfo.GetNumElements()); +- std::vector<int32_t> expectedValues({ 0, 1, 0 }); ++ std::vector<int64_t> outputValues(outputInfo.GetNumElements()); ++ std::vector<int64_t> expectedValues({ 0, 1, 0 }); + + ArgMinMax(*armnn::MakeDecoder<float>(inputInfo, inputValues.data()), + outputValues.data(), +@@ -35,11 +35,11 @@ BOOST_AUTO_TEST_CASE(ArgMinTest) + BOOST_AUTO_TEST_CASE(ArgMaxTest) + { + const armnn::TensorInfo inputInfo({ 1, 2, 3 } , armnn::DataType::Float32); +- const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Float32); ++ const armnn::TensorInfo outputInfo({ 1, 3 }, armnn::DataType::Signed64); + + std::vector<float> inputValues({ 1.0f, 5.0f, 3.0f, 4.0f, 2.0f, 6.0f }); +- std::vector<int32_t> outputValues(outputInfo.GetNumElements()); +- std::vector<int32_t> expectedValues({ 1, 0, 1 }); ++ std::vector<int64_t> outputValues(outputInfo.GetNumElements()); ++ std::vector<int64_t> expectedValues({ 1, 0, 1 }); + + ArgMinMax(*armnn::MakeDecoder<float>(inputInfo, inputValues.data()), + outputValues.data(), +diff --git a/src/backends/reference/workloads/ArgMinMax.cpp b/src/backends/reference/workloads/ArgMinMax.cpp +index 637aa17..308a869 100644 +--- a/src/backends/reference/workloads/ArgMinMax.cpp ++++ b/src/backends/reference/workloads/ArgMinMax.cpp +@@ -12,6 +12,39 @@ + namespace armnn + { + ++void ArgMinMax(Decoder<float>& in, int64_t* out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis) ++{ ++ IgnoreUnused(outputTensorInfo); ++ ++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis); ++ ++ const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis); ++ const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis]; ++ const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), ++ uAxis + 1, ++ inputTensorInfo.GetNumDimensions()); ++ ++ for (unsigned int outer = 0; outer < outerElements; ++outer) { ++ for (unsigned int inner = 0; inner < innerElements; ++inner) { ++ in[outer * axisSize * innerElements + inner]; ++ auto tmpValue = in.Get(); ++ unsigned int tmpIndex = 0; ++ for (unsigned int i = 1; i < axisSize; ++i) { ++ in[(outer * axisSize * innerElements) + (i * innerElements) + inner]; ++ const auto& value = in.Get(); ++ if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) || ++ (function == armnn::ArgMinMaxFunction::Max && value > tmpValue)) { ++ tmpValue = value; ++ tmpIndex = i; ++ } ++ } ++ ++ out[outer * innerElements + inner] = boost::numeric_cast<int64_t>(tmpIndex); ++ } ++ } ++} ++ + void ArgMinMax(Decoder<float>& in, int32_t* out, const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis) + { +diff --git a/src/backends/reference/workloads/ArgMinMax.hpp b/src/backends/reference/workloads/ArgMinMax.hpp +index 5a9c6a8..b4693ee 100644 +--- a/src/backends/reference/workloads/ArgMinMax.hpp ++++ b/src/backends/reference/workloads/ArgMinMax.hpp +@@ -13,6 +13,9 @@ + namespace armnn + { + ++void ArgMinMax(Decoder<float>& in, int64_t* out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis); ++ + void ArgMinMax(Decoder<float>& in, int32_t* out, const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, ArgMinMaxFunction function, int axis); + +diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt +index 94c8513..a8ddd1d 100644 +--- a/src/backends/reference/workloads/CMakeLists.txt ++++ b/src/backends/reference/workloads/CMakeLists.txt +@@ -5,8 +5,6 @@ + + list(APPEND armnnRefBackendWorkloads_sources + Abs.hpp +- ArgMinMax.cpp +- ArgMinMax.hpp + Activation.cpp + Activation.hpp + ArgMinMax.cpp +diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp +index 5f1eb73..b7246d5 100644 +--- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp ++++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp +@@ -29,10 +29,15 @@ void RefArgMinMaxWorkload::Execute() const + + const TensorInfo &outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); + +- int32_t* output = GetOutputTensorData<int32_t>(0, m_Data); +- +- ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function, +- m_Data.m_Parameters.m_Axis); ++ if (m_Data.m_Parameters.m_Output_Type == armnn::DataType::Signed32) { ++ int32_t *output = GetOutputTensorData<int32_t>(0, m_Data); ++ ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function, ++ m_Data.m_Parameters.m_Axis); ++ } else { ++ int64_t *output = GetOutputTensorData<int64_t>(0, m_Data); ++ ArgMinMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Function, ++ m_Data.m_Parameters.m_Axis); ++ } + } + + } //namespace armnn +\ No newline at end of file +-- +2.7.4 + diff --git a/packaging/0002-backends-reference-Add-ReduceSum-op-support.patch b/packaging/0002-backends-reference-Add-ReduceSum-op-support.patch new file mode 100644 index 000000000..e567e5ace --- /dev/null +++ b/packaging/0002-backends-reference-Add-ReduceSum-op-support.patch @@ -0,0 +1,869 @@ +From 0f6fba191b60093d9059e3ca7489cb24a508d8bc Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Mon, 7 Sep 2020 19:34:36 +0900 +Subject: [PATCH 02/10] backends/reference: Add ReduceSum op support + +Change-Id: I322010989be9dc94e48ef5e0e184b977f4cd2427 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + CMakeLists.txt | 2 + + include/armnn/Descriptors.hpp | 32 ++++++++++ + include/armnn/DescriptorsFwd.hpp | 1 + + include/armnn/ILayerSupport.hpp | 5 ++ + include/armnn/ILayerVisitor.hpp | 8 +++ + include/armnn/INetwork.hpp | 7 +++ + include/armnn/LayerVisitorBase.hpp | 4 ++ + src/armnn/InternalTypes.hpp | 3 +- + src/armnn/LayersFwd.hpp | 2 + + src/armnn/Network.cpp | 6 ++ + src/armnn/Network.hpp | 4 ++ + src/armnn/layers/ReduceSumLayer.cpp | 70 ++++++++++++++++++++++ + src/armnn/layers/ReduceSumLayer.hpp | 48 +++++++++++++++ + src/armnnTfLiteParser/TfLiteParser.cpp | 45 ++++++++++++++ + src/armnnTfLiteParser/TfLiteParser.hpp | 1 + + src/backends/backendsCommon/LayerSupportBase.cpp | 8 +++ + src/backends/backendsCommon/LayerSupportBase.hpp | 4 ++ + src/backends/backendsCommon/WorkloadData.cpp | 27 +++++++++ + src/backends/backendsCommon/WorkloadData.hpp | 5 ++ + src/backends/backendsCommon/WorkloadFactory.cpp | 18 ++++++ + src/backends/backendsCommon/WorkloadFactory.hpp | 2 + + src/backends/reference/RefLayerSupport.cpp | 30 ++++++++++ + src/backends/reference/RefLayerSupport.hpp | 4 ++ + src/backends/reference/RefWorkloadFactory.cpp | 6 ++ + src/backends/reference/RefWorkloadFactory.hpp | 4 ++ + src/backends/reference/workloads/CMakeLists.txt | 4 ++ + src/backends/reference/workloads/ReduceSum.cpp | 70 ++++++++++++++++++++++ + src/backends/reference/workloads/ReduceSum.hpp | 20 +++++++ + .../reference/workloads/RefReduceSumWorkload.cpp | 35 +++++++++++ + .../reference/workloads/RefReduceSumWorkload.hpp | 21 +++++++ + src/backends/reference/workloads/RefWorkloads.hpp | 1 + + 31 files changed, 496 insertions(+), 1 deletion(-) + create mode 100644 src/armnn/layers/ReduceSumLayer.cpp + create mode 100644 src/armnn/layers/ReduceSumLayer.hpp + create mode 100644 src/backends/reference/workloads/ReduceSum.cpp + create mode 100644 src/backends/reference/workloads/ReduceSum.hpp + create mode 100644 src/backends/reference/workloads/RefReduceSumWorkload.cpp + create mode 100644 src/backends/reference/workloads/RefReduceSumWorkload.hpp + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 5c125e9..962dc2d 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -380,6 +380,8 @@ list(APPEND armnn_sources + src/armnn/layers/TransposeConvolution2dLayer.hpp + src/armnn/layers/TransposeLayer.hpp + src/armnn/layers/TransposeLayer.cpp ++ src/armnn/layers/ReduceSumLayer.hpp ++ src/armnn/layers/ReduceSumLayer.cpp + src/armnn/BackendRegistry.cpp + src/armnn/BackendSettings.hpp + src/armnn/BackendHelper.cpp +diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp +index 364e447..3651c20 100644 +--- a/include/armnn/Descriptors.hpp ++++ b/include/armnn/Descriptors.hpp +@@ -1215,4 +1215,36 @@ struct TransposeDescriptor + PermutationVector m_DimMappings; + }; + ++/// A ReduceSumDescriptor for the REDUCE SUM. ++struct ReduceSumDescriptor ++{ ++ ReduceSumDescriptor() ++ : m_TargetWidth(0) ++ , m_TargetHeight(0) ++ , m_KeepDims(0) ++ , m_Axis(0) ++ , m_DataLayout(DataLayout::NCHW) ++ {} ++ ++ bool operator ==(const ReduceSumDescriptor& rhs) const ++ { ++ return m_TargetWidth == rhs.m_TargetWidth && ++ m_TargetHeight == rhs.m_TargetHeight && ++ m_KeepDims == rhs.m_KeepDims && ++ m_Axis == rhs.m_Axis && ++ m_DataLayout == rhs.m_DataLayout; ++ } ++ ++ /// Target width value. ++ uint32_t m_TargetWidth; ++ /// Target height value. ++ uint32_t m_TargetHeight; ++ /// if true then output shape has no change. ++ uint32_t m_KeepDims; ++ /// The indices of the dimensions to reduce. ++ int32_t m_Axis; ++ /// The data layout to be used (NCHW, NHWC). ++ DataLayout m_DataLayout; ++}; ++ + } // namespace armnn +diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp +index f090372..cef85d5 100644 +--- a/include/armnn/DescriptorsFwd.hpp ++++ b/include/armnn/DescriptorsFwd.hpp +@@ -33,6 +33,7 @@ struct QLstmDescriptor; + struct ReshapeDescriptor; + struct ResizeBilinearDescriptor; + struct ResizeDescriptor; ++struct ReduceSumDescriptor; + struct SoftmaxDescriptor; + struct SpaceToBatchNdDescriptor; + struct SpaceToDepthDescriptor; +diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp +index 58509c9..0701790 100644 +--- a/include/armnn/ILayerSupport.hpp ++++ b/include/armnn/ILayerSupport.hpp +@@ -392,6 +392,11 @@ public: + const TransposeDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0; + ++ virtual bool IsReduceSumSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceSumDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0; ++ + }; // class ILayerSupport + + using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>; +diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp +index 530e74f..cd57275 100644 +--- a/include/armnn/ILayerVisitor.hpp ++++ b/include/armnn/ILayerVisitor.hpp +@@ -403,6 +403,14 @@ public: + const ResizeDescriptor& resizeDescriptor, + const char* name = nullptr) = 0; + ++ /// Function that a reduce_sum layer should call back to when its Accept(ILayerVisitor&) function is invoked. ++ /// @param layer - pointer to the layer which is calling back to this visit function. ++ /// @param ReduceSumDescriptor - Parameters for the reduce max operation. ++ /// @param name - Optional name for the layer. ++ virtual void VisitReduceSumLayer(const IConnectableLayer* layer, ++ const ReduceSumDescriptor& reducesumDescriptor, ++ const char* name = nullptr) = 0; ++ + /// Function a Reciprocal of square root layer should call back to when its Accept(ILayerVisitor&) + /// function is invoked. + /// @param layer - pointer to the layer which is calling back to this visit function. +diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp +index b840dd5..79ad686 100644 +--- a/include/armnn/INetwork.hpp ++++ b/include/armnn/INetwork.hpp +@@ -360,6 +360,13 @@ public: + virtual IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, + const char* name = nullptr) = 0; + ++ /// Adds a reducemax layer to the network. ++ /// @param ReduceSumDescriptor - Parameters for the reducemax operation. ++ /// @param name - Optional name for the layer. ++ /// @return - Interface for configuring the layer. ++ virtual IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducemaxDescriptor, ++ const char* name = nullptr) = 0; ++ + /// Adds an instance normalization layer to the network. + /// @param desc - Parameters for the instance normalization operation. + /// @param name - Optional name for the layer. +diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp +index 95d6bd3..209ef2c 100644 +--- a/include/armnn/LayerVisitorBase.hpp ++++ b/include/armnn/LayerVisitorBase.hpp +@@ -204,6 +204,10 @@ public: + const ResizeDescriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } + ++ void VisitReduceSumLayer(const IConnectableLayer*, ++ const ReduceSumDescriptor&, ++ const char*) override { DefaultPolicy::Apply(__func__); } ++ + void VisitRsqrtLayer(const IConnectableLayer*, + const char*) override { DefaultPolicy::Apply(__func__); } + +diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp +index 455cb60..5f5ee01 100644 +--- a/src/armnn/InternalTypes.hpp ++++ b/src/armnn/InternalTypes.hpp +@@ -71,7 +71,8 @@ + X(Subtraction) \ + X(Switch) \ + X(Transpose) \ +- X(TransposeConvolution2d) ++ X(TransposeConvolution2d) \ ++ X(ReduceSum) + + /// When adding a new layer, adapt also the LastLayer enum value in the + /// enum class LayerType below +diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp +index 2054413..5092828 100644 +--- a/src/armnn/LayersFwd.hpp ++++ b/src/armnn/LayersFwd.hpp +@@ -54,6 +54,7 @@ + #include "layers/QuantizedLstmLayer.hpp" + #include "layers/ReshapeLayer.hpp" + #include "layers/ResizeLayer.hpp" ++#include "layers/ReduceSumLayer.hpp" + #include "layers/SliceLayer.hpp" + #include "layers/SoftmaxLayer.hpp" + #include "layers/SpaceToBatchNdLayer.hpp" +@@ -142,6 +143,7 @@ DECLARE_LAYER(QLstm) + DECLARE_LAYER(QuantizedLstm) + DECLARE_LAYER(Reshape) + DECLARE_LAYER(Resize) ++DECLARE_LAYER(ReduceSum) + DECLARE_LAYER(Slice) + DECLARE_LAYER(Softmax) + DECLARE_LAYER(SpaceToBatchNd) +diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp +index a047297..335e104 100644 +--- a/src/armnn/Network.cpp ++++ b/src/armnn/Network.cpp +@@ -1472,6 +1472,12 @@ resizeDescriptor, const char* name) + return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name); + } + ++IConnectableLayer* Network::AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor, ++ const char* name) ++{ ++ return m_Graph->AddLayer<ReduceSumLayer>(reducesumDescriptor, name); ++} ++ + IConnectableLayer* Network::AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc, + const char* name) + { +diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp +index df4a35f..6c767f3 100644 +--- a/src/armnn/Network.hpp ++++ b/src/armnn/Network.hpp +@@ -160,6 +160,10 @@ public: + IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, + const char* name = nullptr) override; + ++ IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor, ++ const char* name = nullptr) override; ++ ++ + IConnectableLayer* AddInstanceNormalizationLayer(const InstanceNormalizationDescriptor& desc, + const char* name = nullptr) override; + +diff --git a/src/armnn/layers/ReduceSumLayer.cpp b/src/armnn/layers/ReduceSumLayer.cpp +new file mode 100644 +index 0000000..198289c +--- /dev/null ++++ b/src/armnn/layers/ReduceSumLayer.cpp +@@ -0,0 +1,70 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReduceSumLayer.hpp" ++#include "LayerCloneBase.hpp" ++ ++#include <armnn/TypesUtils.hpp> ++ ++#include <armnnUtils/DataLayoutIndexed.hpp> ++ ++#include <backendsCommon/WorkloadData.hpp> ++#include <backendsCommon/WorkloadFactory.hpp> ++ ++using namespace armnnUtils; ++ ++namespace armnn ++{ ++ ++ReduceSumLayer::ReduceSumLayer(const ReduceSumDescriptor& param, const char* name) ++ : LayerWithParameters(1, 1, LayerType::ReduceSum, param, name) ++{ ++} ++ ++std::unique_ptr<IWorkload> ReduceSumLayer::CreateWorkload(const IWorkloadFactory& factory) const ++{ ++ ReduceSumQueueDescriptor descriptor; ++ return factory.CreateReduceSum(descriptor, PrepInfoAndDesc(descriptor)); ++} ++ ++ReduceSumLayer* ReduceSumLayer::Clone(Graph& graph) const ++{ ++ return CloneBase<ReduceSumLayer>(graph, m_Param, GetName()); ++} ++ ++std::vector<TensorShape> ReduceSumLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const ++{ ++ ARMNN_ASSERT(inputShapes.size() == 1); ++ ++ const TensorShape& inputShape = inputShapes[0]; ++ const DataLayoutIndexed dimensionIndices = m_Param.m_DataLayout; ++ ++ unsigned int outWidth = m_Param.m_TargetWidth; ++ unsigned int outHeight = m_Param.m_TargetHeight; ++ unsigned int outChannels = inputShape[dimensionIndices.GetChannelsIndex()]; ++ unsigned int outBatch = inputShape[0]; ++ ++ TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ? ++ TensorShape( { outBatch, outHeight, outWidth, outChannels } ) : ++ TensorShape( { outBatch, outChannels, outHeight, outWidth }); ++ ++ return std::vector<TensorShape>({ tensorShape }); ++} ++ ++void ReduceSumLayer::ValidateTensorShapesFromInputs() ++{ ++ VerifyLayerConnections(1, CHECK_LOCATION()); ++ ++ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() }); ++ ++ ARMNN_ASSERT(inferredShapes.size() == 1); ++} ++ ++void ReduceSumLayer::Accept(ILayerVisitor& visitor) const ++{ ++ visitor.VisitReduceSumLayer(this, GetParameters(), GetName()); ++} ++ ++} // namespace armnn +diff --git a/src/armnn/layers/ReduceSumLayer.hpp b/src/armnn/layers/ReduceSumLayer.hpp +new file mode 100644 +index 0000000..e8fe998 +--- /dev/null ++++ b/src/armnn/layers/ReduceSumLayer.hpp +@@ -0,0 +1,48 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++#pragma once ++ ++#include "LayerWithParameters.hpp" ++ ++namespace armnn ++{ ++ ++/// This layer represents a reducemax operation. ++class ReduceSumLayer : public LayerWithParameters<ReduceSumDescriptor> ++{ ++public: ++ /// Makes a workload for the ReduceSum type. ++ /// @param [in] graph The graph where this layer can be found. ++ /// @param [in] factory The workload factory which will create the workload. ++ /// @return A pointer to the created workload, or nullptr if not created. ++ virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override; ++ ++ /// Creates a dynamically-allocated copy of this layer. ++ /// @param [in] graph The graph into which this layer is being cloned. ++ ReduceSumLayer* Clone(Graph& graph) const override; ++ ++ /// Check if the input tensor shape(s) ++ /// will lead to a valid configuration of @ref ReduceSumLayer. ++ void ValidateTensorShapesFromInputs() override; ++ ++ /// By default returns inputShapes if the number of inputs are equal to number of outputs, ++ /// otherwise infers the output shapes from given input shapes and layer properties. ++ /// @param [in] inputShapes The input shapes layer has. ++ /// @return A vector to the inferred output shape. ++ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override; ++ ++ void Accept(ILayerVisitor& visitor) const override; ++ ++protected: ++ /// Constructor to create a ReduceSumLayer. ++ /// @param [in] param ReduceSumDescriptor to configure the resize operation. ++ /// @param [in] name Optional name for the layer. ++ ReduceSumLayer(const ReduceSumDescriptor& param, const char* name); ++ ++ /// Default destructor ++ ~ReduceSumLayer() = default; ++}; ++ ++} // namespace armnn +diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp +index 6c1a64b..e5400dc 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.cpp ++++ b/src/armnnTfLiteParser/TfLiteParser.cpp +@@ -532,6 +532,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o + m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV] = &TfLiteParser::ParseTransposeConv; + m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack; + m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax; ++ m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum; + + // register supported custom operators + m_CustomParserFunctions["TFLite_Detection_PostProcess"] = &TfLiteParser::ParseDetectionPostProcess; +@@ -2772,6 +2773,50 @@ void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex) + RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); + } + ++void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex) ++{ ++ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex]; ++ const auto *options = operatorPtr->builtin_options.AsReducerOptions(); ++ ++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(inputs.size(), 2); ++ ++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(outputs.size(), 1); ++ ++ auto layerName = boost::str(boost::format("ReduceSum:%1%:%2%") % subgraphIndex % operatorIndex); ++ ++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]); ++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]); ++ ++ TensorShape shape = sizeTensorInfo0.GetShape(); ++ ++ // Get const axis value from model and set it to descriptor. ++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer); ++ ++ ReduceSumDescriptor desc; ++ desc.m_KeepDims = options->keep_dims; ++ desc.m_Axis = axisBufferPtr->data.data()[4]; ++ desc.m_TargetHeight = shape[1]; ++ desc.m_TargetWidth = shape[2]; ++ desc.m_DataLayout = armnn::DataLayout::NHWC; ++ ++ // Register a new layer object, ReduceMax, to in-memory network of ARMNN. ++ IConnectableLayer *layer = m_Network->AddReduceSumLayer(desc, layerName.c_str()); ++ ++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); ++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); ++ ++ // Register input tensor to the layer. ++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]}); ++ ++ // Register output tensor to the layer. ++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); ++} ++ + armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer, + unsigned int outputSlot, + tflite::ActivationFunctionType activationType) +diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp +index 478edad..13d1cb4 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.hpp ++++ b/src/armnnTfLiteParser/TfLiteParser.hpp +@@ -134,6 +134,7 @@ private: + void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex); + void ParseUnpack(size_t subgraphIndex, size_t operatorIndex); + void ParseArgMax(size_t subgraphIndex, size_t operatorIndex); ++ void ParseSum(size_t subgraphIndex, size_t operatorIndex); + + void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot); + void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot); +diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp +index c55f51d..245b165 100644 +--- a/src/backends/backendsCommon/LayerSupportBase.cpp ++++ b/src/backends/backendsCommon/LayerSupportBase.cpp +@@ -615,4 +615,12 @@ bool LayerSupportBase::IsTransposeSupported(const TensorInfo& /*input*/, + return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); + } + ++bool LayerSupportBase::IsReduceSumSupported(const TensorInfo& /*input*/, ++ const TensorInfo& /*output*/, ++ const ReduceSumDescriptor& /*descriptor*/, ++ Optional<std::string&> reasonIfUnsupported) const ++{ ++ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); ++} ++ + } // namespace armnn +diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp +index fcc3326..9b39f8f 100644 +--- a/src/backends/backendsCommon/LayerSupportBase.hpp ++++ b/src/backends/backendsCommon/LayerSupportBase.hpp +@@ -377,6 +377,10 @@ public: + const TransposeDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + ++ bool IsReduceSumSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceSumDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + }; + + } // namespace armnn +diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp +index 69bebbc..7455ab5 100644 +--- a/src/backends/backendsCommon/WorkloadData.cpp ++++ b/src/backends/backendsCommon/WorkloadData.cpp +@@ -3483,4 +3483,31 @@ void ElementwiseUnaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) + ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); + } + ++void ReduceSumQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const ++{ ++ const std::string descriptorName{"ReduceSumQueueDescriptor"}; ++ ++ ValidateNumInputs(workloadInfo, descriptorName, 1); ++ ValidateNumOutputs(workloadInfo, descriptorName, 1); ++ ++ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; ++ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; ++ ++ ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input"); ++ ++ std::vector<DataType> supportedTypes = ++ { ++ DataType::BFloat16, ++ DataType::Float16, ++ DataType::Float32, ++ DataType::QAsymmS8, ++ DataType::QAsymmU8, ++ DataType::QSymmS16, ++ DataType::Signed32 ++ }; ++ ++ ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); ++ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); ++} ++ + } // namespace armnn +diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp +index 9239f4a..6f203b5 100644 +--- a/src/backends/backendsCommon/WorkloadData.hpp ++++ b/src/backends/backendsCommon/WorkloadData.hpp +@@ -634,4 +634,9 @@ struct ElementwiseUnaryQueueDescriptor : QueueDescriptorWithParameters<Elementwi + void Validate(const WorkloadInfo& workloadInfo) const; + }; + ++struct ReduceSumQueueDescriptor : QueueDescriptorWithParameters<ReduceSumDescriptor> ++{ ++ void Validate(const WorkloadInfo& workloadInfo) const; ++}; ++ + } // namespace armnn +diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp +index 34bfd7c..b7195f7 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.cpp ++++ b/src/backends/backendsCommon/WorkloadFactory.cpp +@@ -1178,6 +1178,18 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, + + break; + } ++ case LayerType::ReduceSum: ++ { ++ auto cLayer = PolymorphicDowncast<const ReduceSumLayer*>(&layer); ++ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); ++ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); ++ ++ result = layerSupportObject->IsReduceSumSupported(OverrideDataType(input, dataType), ++ OverrideDataType(output, dataType), ++ cLayer->GetParameters(), ++ reason); ++ break; ++ } + default: + { + ARMNN_ASSERT_MSG(false, "WorkloadFactory did not recognise type of layer."); +@@ -1584,4 +1596,10 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateTransposeConvolution2d( + return std::unique_ptr<IWorkload>(); + } + ++std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& /*descriptor*/, ++ const WorkloadInfo& /*info*/) const ++{ ++ return std::unique_ptr<IWorkload>(); ++} ++ + } // namepsace armnn +diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp +index 98a6c36..0d98c92 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.hpp ++++ b/src/backends/backendsCommon/WorkloadFactory.hpp +@@ -250,6 +250,8 @@ public: + virtual std::unique_ptr<IWorkload> CreateTransposeConvolution2d( + const TransposeConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const; ++ virtual std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, ++ const WorkloadInfo& info) const; + }; + + } // namespace armnn +diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp +index 034cd12..333ad4d 100644 +--- a/src/backends/reference/RefLayerSupport.cpp ++++ b/src/backends/reference/RefLayerSupport.cpp +@@ -2132,4 +2132,34 @@ bool RefLayerSupport::IsTransposeSupported(const TensorInfo& input, + return supported; + } + ++bool RefLayerSupport::IsReduceSumSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceSumDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported) const ++{ ++ IgnoreUnused(descriptor); ++ bool supported = true; ++ std::array<DataType,7> supportedTypes = ++ { ++ DataType::BFloat16, ++ DataType::Float32, ++ DataType::Float16, ++ DataType::QAsymmS8, ++ DataType::QAsymmU8, ++ DataType::QSymmS16, ++ DataType::Signed32 ++ }; ++ ++ supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported, ++ "Reference ReduceSum: input type not supported"); ++ ++ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported, ++ "Reference ReduceSum: output type not supported"); ++ ++ supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported, ++ "Reference ReduceSum: input and output types not matching"); ++ ++ return supported; ++} ++ + } // namespace armnn +diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp +index eb89946..766ddfa 100644 +--- a/src/backends/reference/RefLayerSupport.hpp ++++ b/src/backends/reference/RefLayerSupport.hpp +@@ -342,6 +342,10 @@ public: + const TransposeDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + ++ bool IsReduceSumSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceSumDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + }; + + } // namespace armnn +diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp +index 5ce997c..9602df5 100644 +--- a/src/backends/reference/RefWorkloadFactory.cpp ++++ b/src/backends/reference/RefWorkloadFactory.cpp +@@ -632,4 +632,10 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d( + return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info); + } + ++std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, ++ const WorkloadInfo& info) const ++{ ++ return std::make_unique<RefReduceSumWorkload>(descriptor, info); ++} ++ + } // namespace armnn +diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp +index 1c607c0..93cab9a 100644 +--- a/src/backends/reference/RefWorkloadFactory.hpp ++++ b/src/backends/reference/RefWorkloadFactory.hpp +@@ -250,6 +250,10 @@ public: + std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + ++ std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, ++ const WorkloadInfo& info) const override; ++ ++ + private: + template <typename F32Workload, typename U8Workload, typename QueueDescriptorType> + std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const; +diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt +index a8ddd1d..d5eceff 100644 +--- a/src/backends/reference/workloads/CMakeLists.txt ++++ b/src/backends/reference/workloads/CMakeLists.txt +@@ -49,6 +49,8 @@ list(APPEND armnnRefBackendWorkloads_sources + Minimum.hpp + Pad.cpp + Pad.hpp ++ ReduceSum.cpp ++ ReduceSum.hpp + Pooling2d.cpp + Pooling2d.hpp + PreluImpl.cpp +@@ -172,6 +174,8 @@ list(APPEND armnnRefBackendWorkloads_sources + TensorBufferArrayView.hpp + TransposeConvolution2d.cpp + TransposeConvolution2d.hpp ++ RefReduceSumWorkload.cpp ++ RefReduceSumWorkload.hpp + ) + + add_library(armnnRefBackendWorkloads OBJECT ${armnnRefBackendWorkloads_sources}) +diff --git a/src/backends/reference/workloads/ReduceSum.cpp b/src/backends/reference/workloads/ReduceSum.cpp +new file mode 100644 +index 0000000..475e163 +--- /dev/null ++++ b/src/backends/reference/workloads/ReduceSum.cpp +@@ -0,0 +1,70 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReduceSum.hpp" ++ ++#include <armnnUtils/TensorUtils.hpp> ++ ++#include <boost/numeric/conversion/cast.hpp> ++#include <iostream> ++ ++namespace armnn ++{ ++ ++void ReduceSum(Decoder<float>& in, float *out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, int32_t axis) ++{ ++ IgnoreUnused(outputTensorInfo); ++ ++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis); ++ const unsigned int batchSize = inputTensorInfo.GetShape()[0]; ++ const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis); ++ const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis]; ++ const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), ++ uAxis + 1, ++ inputTensorInfo.GetNumDimensions()); ++ ++ // Workaround code, and it is performed if below condition is met, ++ // batch size > 0 and ++ // axis == 2 and ++ // input shape size == 4 ++ if (batchSize > 0 && uAxis == 2 && inputTensorInfo.GetShape().GetNumDimensions() == 4) { ++ unsigned int height = inputTensorInfo.GetShape()[1]; ++ unsigned int width = inputTensorInfo.GetShape()[2]; ++ unsigned int channel = inputTensorInfo.GetShape()[3]; ++ float sumValue = 0.0f; ++ ++ for (unsigned int b = 0; b < batchSize; ++b) { ++ for (unsigned int c = 0; c < channel; ++c) { ++ for (unsigned int y = 0; y < height; ++y) { ++ for (unsigned int x = 0; x < width; ++x) { ++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c]; ++ sumValue += in.Get(); ++ } ++ } ++ ++ out[b * channel + c] = sumValue; ++ sumValue = 0.0f; ++ } ++ } ++ ++ return; ++ } ++ ++ for (unsigned int outer = 0; outer < outerElements; ++outer) { ++ for (unsigned int inner = 0; inner < innerElements; ++inner) { ++ in[outer * axisSize * innerElements + inner]; ++ auto tmpValue = in.Get(); ++ for (unsigned int i = 1; i < axisSize; ++i) { ++ in[(outer * axisSize * innerElements) + (i * innerElements) + inner]; ++ const auto& value = in.Get(); ++ tmpValue += value; ++ } ++ out[outer * innerElements + inner] = tmpValue; ++ } ++ } ++} ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/ReduceSum.hpp b/src/backends/reference/workloads/ReduceSum.hpp +new file mode 100644 +index 0000000..cfaf347 +--- /dev/null ++++ b/src/backends/reference/workloads/ReduceSum.hpp +@@ -0,0 +1,20 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include "armnn/Tensor.hpp" ++#include "armnn/Descriptors.hpp" ++ ++#include "Decoders.hpp" ++ ++namespace armnn ++{ ++ ++void ReduceSum(Decoder<float>& in, float* out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, int32_t axis); ++ ++} //namespace armnn ++ +diff --git a/src/backends/reference/workloads/RefReduceSumWorkload.cpp b/src/backends/reference/workloads/RefReduceSumWorkload.cpp +new file mode 100644 +index 0000000..4ea91a6 +--- /dev/null ++++ b/src/backends/reference/workloads/RefReduceSumWorkload.cpp +@@ -0,0 +1,35 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "RefReduceSumWorkload.hpp" ++ ++#include "ReduceSum.hpp" ++#include "RefWorkloadUtils.hpp" ++#include "BaseIterator.hpp" ++#include "Profiling.hpp" ++ ++#include "BaseIterator.hpp" ++#include "Decoders.hpp" ++#include "Encoders.hpp" ++ ++namespace armnn ++{ ++ ++void RefReduceSumWorkload::Execute() const ++{ ++ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceSumWorkload_Execute"); ++ ++ const TensorInfo& inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); ++ const TensorInfo& outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); ++ ++ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map()); ++ Decoder<float> &decoder = *decoderPtr; ++ ++ float *output = GetOutputTensorData<float>(0, m_Data); ++ ++ ReduceSum(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Axis); ++} ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/RefReduceSumWorkload.hpp b/src/backends/reference/workloads/RefReduceSumWorkload.hpp +new file mode 100644 +index 0000000..102b9f3 +--- /dev/null ++++ b/src/backends/reference/workloads/RefReduceSumWorkload.hpp +@@ -0,0 +1,21 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include <backendsCommon/Workload.hpp> ++#include <backendsCommon/WorkloadData.hpp> ++ ++namespace armnn ++{ ++ ++class RefReduceSumWorkload : public BaseWorkload<ReduceSumQueueDescriptor> ++{ ++public: ++ using BaseWorkload<ReduceSumQueueDescriptor>::BaseWorkload; ++ virtual void Execute() const override; ++}; ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp +index e396a6b..c80ed43 100644 +--- a/src/backends/reference/workloads/RefWorkloads.hpp ++++ b/src/backends/reference/workloads/RefWorkloads.hpp +@@ -67,3 +67,4 @@ + #include "Softmax.hpp" + #include "Splitter.hpp" + #include "TensorBufferArrayView.hpp" ++#include "RefReduceSumWorkload.hpp" +-- +2.7.4 + diff --git a/packaging/0003-backends-test-Add-ReduceSum-test-cases.patch b/packaging/0003-backends-test-Add-ReduceSum-test-cases.patch new file mode 100644 index 000000000..989be5b57 --- /dev/null +++ b/packaging/0003-backends-test-Add-ReduceSum-test-cases.patch @@ -0,0 +1,399 @@ +From 7a6c7409021a64749b8792ea069d81463c5ee98c Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Mon, 7 Sep 2020 20:17:38 +0900 +Subject: [PATCH 03/10] backends/test: Add ReduceSum test cases + +Change-Id: Ic6d02e0e51908958cd1499f0d0f22146763574ee +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + src/backends/backendsCommon/test/CMakeLists.txt | 2 + + src/backends/backendsCommon/test/LayerTests.hpp | 1 + + .../test/layerTests/ReduceSumTestImpl.cpp | 293 +++++++++++++++++++++ + .../test/layerTests/ReduceSumTestImpl.hpp | 33 +++ + src/backends/reference/test/RefLayerTests.cpp | 7 + + 5 files changed, 336 insertions(+) + create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp + create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp + +diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt +index dd96d36..951a46d 100644 +--- a/src/backends/backendsCommon/test/CMakeLists.txt ++++ b/src/backends/backendsCommon/test/CMakeLists.txt +@@ -127,6 +127,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources + layerTests/PreluTestImpl.hpp + layerTests/QuantizeTestImpl.cpp + layerTests/QuantizeTestImpl.hpp ++ layerTests/ReduceSumTestImpl.cpp ++ layerTests/ReduceSumTestImpl.hpp + layerTests/ReshapeTestImpl.cpp + layerTests/ReshapeTestImpl.hpp + layerTests/ResizeTestImpl.cpp +diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp +index 247ed12..25f411f 100644 +--- a/src/backends/backendsCommon/test/LayerTests.hpp ++++ b/src/backends/backendsCommon/test/LayerTests.hpp +@@ -44,6 +44,7 @@ + #include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp> + #include <backendsCommon/test/layerTests/PreluTestImpl.hpp> + #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp> ++#include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp> + #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp> + #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp> + #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp> +diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp +new file mode 100644 +index 0000000..4d698df +--- /dev/null ++++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp +@@ -0,0 +1,293 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReduceSumTestImpl.hpp" ++ ++#include <backendsCommon/test/DataTypeUtils.hpp> ++#include <backendsCommon/test/TensorCopyUtils.hpp> ++#include <backendsCommon/test/WorkloadTestUtils.hpp> ++ ++#include <test/TensorHelpers.hpp> ++ ++namespace ++{ ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceSumTestCommon( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, ++ const armnn::TensorInfo inputTensorInfo, ++ const armnn::TensorInfo outputTensorInfo, ++ const std::vector<float>& inputData, ++ const std::vector<float>& outputData, ++ int axis = 3) ++{ ++ IgnoreUnused(memoryManager); ++ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo)); ++ ++ LayerTestResult<float, 4> result(outputTensorInfo); ++ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData); ++ ++ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); ++ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); ++ ++ armnn::ReduceSumQueueDescriptor descriptor; ++ unsigned int updated_idx = static_cast<uint32_t>(axis); ++ if (axis < 0) { ++ updated_idx = static_cast<uint32_t>(static_cast<int32_t>(inputTensorInfo.GetNumDimensions()) + axis); ++ } ++ ++ descriptor.m_Parameters.m_Axis = updated_idx; ++ descriptor.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW; ++ armnn::WorkloadInfo info; ++ ++ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); ++ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); ++ ++ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduceSum(descriptor, info); ++ ++ inputHandle->Allocate(); ++ outputHandle->Allocate(); ++ ++ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin()); ++ ++ workload->Execute(); ++ ++ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get()); ++ ++ return result; ++} ++ ++} // namespace ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceSumSimpleTest( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 1, 1, 5 }; ++ const armnn::TensorShape outputShape{ 1, 1, 1, 1}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f }); ++ std::vector<float> outputValues({ 34.0f }); ++ ++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, -1); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceSumMultiChannel_1Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 }; ++ const armnn::TensorShape outputShape{ 1, 1, 2, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ std::vector<float> outputValues({ 111.0f, 222.0f, 333.0f, 444.0f, ++ 555.0f, 666.0f, 777.0f, 888.0f }); ++ ++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 1); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceSumMultiChannel_2Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 }; ++ const armnn::TensorShape outputShape{ 1, 1, 1, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ std::vector<float> outputValues({ 666.0f, 888.0f, 1110.0f, 1332.0f }); ++ ++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 2); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannelTest( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 3, 2, 3, 4 }; ++ const armnn::TensorShape outputShape{ 3, 1, 1, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues( {7, 8, 6, 1, ++ 1, 1, 8, 7, ++ 3, 7, 7, 7, ++ ++ 6, 8, 4, 7, ++ 3, 8, 7, 3, ++ 5, 8, 8, 8, ++ ++ ++ 7, 8, 2, 7, ++ 3, 8, 5, 6, ++ 8, 4, 2, 7, ++ ++ 1, 6, 7, 2, ++ 8, 3, 3, 1, ++ 7, 6, 2, 6, ++ ++ ++ 5, 3, 4, 8, ++ 7, 8, 2, 4, ++ 6, 6, 2, 8, ++ ++ 2, 2, 7, 2, ++ 5, 3, 6, 3, ++ 6, 1, 8, 8}); ++ std::vector<float> outputValues({ 25.0f, 40.0f, 40.0f, 33.0f, ++ 34.0f, 35.0f, 21.0f, 29.0f, ++ 31.0f, 23.0f, 29.0f, 33.0f}); ++ ++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 2); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannel_2Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 3, 2, 3, 4 }; ++ const armnn::TensorShape outputShape{ 3, 2, 3, 1}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues( {7, 8, 6, 1, ++ 1, 1, 8, 7, ++ 3, 7, 7, 7, ++ ++ 6, 8, 4, 7, ++ 3, 8, 7, 3, ++ 5, 8, 8, 8, ++ ++ ++ 7, 8, 2, 7, ++ 3, 8, 5, 6, ++ 8, 4, 2, 7, ++ ++ 1, 6, 7, 2, ++ 8, 3, 3, 1, ++ 7, 6, 2, 6, ++ ++ ++ 5, 3, 4, 8, ++ 7, 8, 2, 4, ++ 6, 6, 2, 8, ++ ++ 2, 2, 7, 2, ++ 5, 3, 6, 3, ++ 6, 1, 8, 8}); ++ std::vector<float> outputValues({ 22.0f, 17.0f, 24.0f, ++ 25.0f, 21.0f, 29.0f, ++ ++ 24.0f, 22.0f, 21.0f, ++ 16.0f, 15.0f, 21.0f, ++ ++ 20.0f, 21.0f, 22.0f, ++ 13.0f, 17.0f, 23.0f}); ++ ++ return ReduceSumTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 3); ++} ++ ++ ++// Explicit template specializations ++ ++template LayerTestResult<float, 4> ++ReduceSumSimpleTest<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceSumMultiChannel_1Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceSumMultiChannel_2Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceSumMultiBatchAndChannelTest<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceSumMultiBatchAndChannel_2Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp +new file mode 100644 +index 0000000..01d1a44 +--- /dev/null ++++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.hpp +@@ -0,0 +1,33 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include "LayerTestResult.hpp" ++ ++#include <ResolveType.hpp> ++ ++#include <armnn/backends/IBackendInternal.hpp> ++#include <backendsCommon/WorkloadFactory.hpp> ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceSumSimpleTest(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceSumMultiChannel_1Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceSumMultiChannel_2Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannelTest(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceSumMultiBatchAndChannel_2Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp +index d8dab3d..9461e2a 100644 +--- a/src/backends/reference/test/RefLayerTests.cpp ++++ b/src/backends/reference/test/RefLayerTests.cpp +@@ -1980,4 +1980,11 @@ ARMNN_AUTO_TEST_CASE(Neg3dQuantisedAsymm8, Neg3dTest<DataType::QAsymmU8>) + ARMNN_AUTO_TEST_CASE(Neg2dQuantisedSymm16, Neg2dTest<DataType::QSymmS16>) + ARMNN_AUTO_TEST_CASE(Neg3dQuantisedSymm16, Neg3dTest<DataType::QSymmS16>) + ++// ReduceSum ++ARMNN_AUTO_TEST_CASE(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannelFloat32, ReduceSumMultiChannel_1Test<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannel2Float32, ReduceSumMultiChannel_2Test<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannelFloat32, ReduceSumMultiBatchAndChannelTest<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannel_2Float32, ReduceSumMultiBatchAndChannel_2Test<DataType::Float32>) ++ + BOOST_AUTO_TEST_SUITE_END() +-- +2.7.4 + diff --git a/packaging/0004-backends-reference-Add-ReverseV2-op-support.patch b/packaging/0004-backends-reference-Add-ReverseV2-op-support.patch new file mode 100644 index 000000000..351923b31 --- /dev/null +++ b/packaging/0004-backends-reference-Add-ReverseV2-op-support.patch @@ -0,0 +1,912 @@ +From 8b90c253ef5bf33537a40d73492c6fe320d03546 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Tue, 8 Sep 2020 10:31:11 +0900 +Subject: [PATCH 04/10] backends/reference: Add ReverseV2 op support + +Change-Id: I0cb1a6fe670e5ff5f9b21b62ff03d3579b956ef7 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + CMakeLists.txt | 2 + + include/armnn/Descriptors.hpp | 28 ++++++++ + include/armnn/DescriptorsFwd.hpp | 1 + + include/armnn/ILayerSupport.hpp | 5 ++ + include/armnn/ILayerVisitor.hpp | 8 +++ + include/armnn/INetwork.hpp | 7 ++ + include/armnn/LayerVisitorBase.hpp | 4 ++ + src/armnn/InternalTypes.hpp | 1 + + src/armnn/LayersFwd.hpp | 2 + + src/armnn/Network.cpp | 6 ++ + src/armnn/Network.hpp | 3 + + src/armnn/layers/ReverseV2Layer.cpp | 75 ++++++++++++++++++++ + src/armnn/layers/ReverseV2Layer.hpp | 48 +++++++++++++ + src/armnnTfLiteParser/TfLiteParser.cpp | 41 +++++++++++ + src/armnnTfLiteParser/TfLiteParser.hpp | 1 + + src/backends/backendsCommon/LayerSupportBase.cpp | 8 +++ + src/backends/backendsCommon/LayerSupportBase.hpp | 5 ++ + src/backends/backendsCommon/WorkloadData.cpp | 49 +++++++++++++ + src/backends/backendsCommon/WorkloadData.hpp | 5 ++ + src/backends/backendsCommon/WorkloadFactory.cpp | 18 +++++ + src/backends/backendsCommon/WorkloadFactory.hpp | 4 ++ + src/backends/reference/RefLayerSupport.cpp | 29 ++++++++ + src/backends/reference/RefLayerSupport.hpp | 5 ++ + src/backends/reference/RefWorkloadFactory.cpp | 6 ++ + src/backends/reference/RefWorkloadFactory.hpp | 3 + + src/backends/reference/workloads/CMakeLists.txt | 4 ++ + .../reference/workloads/RefReverseV2Workload.cpp | 35 ++++++++++ + .../reference/workloads/RefReverseV2Workload.hpp | 21 ++++++ + src/backends/reference/workloads/RefWorkloads.hpp | 1 + + src/backends/reference/workloads/ReverseV2.cpp | 80 ++++++++++++++++++++++ + src/backends/reference/workloads/ReverseV2.hpp | 20 ++++++ + 31 files changed, 525 insertions(+) + create mode 100644 src/armnn/layers/ReverseV2Layer.cpp + create mode 100644 src/armnn/layers/ReverseV2Layer.hpp + create mode 100644 src/backends/reference/workloads/RefReverseV2Workload.cpp + create mode 100644 src/backends/reference/workloads/RefReverseV2Workload.hpp + create mode 100644 src/backends/reference/workloads/ReverseV2.cpp + create mode 100644 src/backends/reference/workloads/ReverseV2.hpp + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 962dc2d..52c8785 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -380,6 +380,8 @@ list(APPEND armnn_sources + src/armnn/layers/TransposeConvolution2dLayer.hpp + src/armnn/layers/TransposeLayer.hpp + src/armnn/layers/TransposeLayer.cpp ++ src/armnn/layers/ReverseV2Layer.hpp ++ src/armnn/layers/ReverseV2Layer.cpp + src/armnn/layers/ReduceSumLayer.hpp + src/armnn/layers/ReduceSumLayer.cpp + src/armnn/BackendRegistry.cpp +diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp +index 3651c20..c7123f1 100644 +--- a/include/armnn/Descriptors.hpp ++++ b/include/armnn/Descriptors.hpp +@@ -1215,6 +1215,34 @@ struct TransposeDescriptor + PermutationVector m_DimMappings; + }; + ++/// A ReverseV2Descriptor for the ReverseV2. ++struct ReverseV2Descriptor ++{ ++ ReverseV2Descriptor() ++ : m_TargetWidth(0) ++ , m_TargetHeight(0) ++ , m_Axis(0) ++ , m_DataLayout(DataLayout::NCHW) ++ {} ++ ++ bool operator ==(const ReverseV2Descriptor& rhs) const ++ { ++ return m_TargetWidth == rhs.m_TargetWidth && ++ m_TargetHeight == rhs.m_TargetHeight && ++ m_Axis == rhs.m_Axis && ++ m_DataLayout == rhs.m_DataLayout; ++ } ++ ++ /// Target width value. ++ uint32_t m_TargetWidth; ++ /// Target height value. ++ uint32_t m_TargetHeight; ++ /// The indices of the dimensions to reverse. ++ int32_t m_Axis; ++ /// The data layout to be used (NCHW, NHWC). ++ DataLayout m_DataLayout; ++}; ++ + /// A ReduceSumDescriptor for the REDUCE SUM. + struct ReduceSumDescriptor + { +diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp +index cef85d5..38e74cd 100644 +--- a/include/armnn/DescriptorsFwd.hpp ++++ b/include/armnn/DescriptorsFwd.hpp +@@ -33,6 +33,7 @@ struct QLstmDescriptor; + struct ReshapeDescriptor; + struct ResizeBilinearDescriptor; + struct ResizeDescriptor; ++struct ReverseV2Descriptor; + struct ReduceSumDescriptor; + struct SoftmaxDescriptor; + struct SpaceToBatchNdDescriptor; +diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp +index 0701790..670c856 100644 +--- a/include/armnn/ILayerSupport.hpp ++++ b/include/armnn/ILayerSupport.hpp +@@ -392,6 +392,11 @@ public: + const TransposeDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0; + ++ virtual bool IsReverseV2Supported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReverseV2Descriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0; ++ + virtual bool IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp +index cd57275..a40dbae 100644 +--- a/include/armnn/ILayerVisitor.hpp ++++ b/include/armnn/ILayerVisitor.hpp +@@ -403,6 +403,14 @@ public: + const ResizeDescriptor& resizeDescriptor, + const char* name = nullptr) = 0; + ++ /// Function that a reversev2 layer should call back to when its Accept(ILayerVisitor&) function is invoked. ++ /// @param layer - pointer to the layer which is calling back to this visit function. ++ /// @param reversev2Descriptor - Parameters for the reversev2 operation. ++ /// @param name - Optional name for the layer. ++ virtual void VisitReverseV2Layer(const IConnectableLayer* layer, ++ const ReverseV2Descriptor& reversev2Descriptor, ++ const char* name = nullptr) = 0; ++ + /// Function that a reduce_sum layer should call back to when its Accept(ILayerVisitor&) function is invoked. + /// @param layer - pointer to the layer which is calling back to this visit function. + /// @param ReduceSumDescriptor - Parameters for the reduce max operation. +diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp +index 79ad686..6678a1c 100644 +--- a/include/armnn/INetwork.hpp ++++ b/include/armnn/INetwork.hpp +@@ -360,6 +360,13 @@ public: + virtual IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, + const char* name = nullptr) = 0; + ++ /// Adds a reversev2 layer to the network. ++ /// @param reversev2Descriptor - Parameters for the reversev2 operation. ++ /// @param name - Optional name for the layer. ++ /// @return - Interface for configuring the layer. ++ virtual IConnectableLayer* AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor, ++ const char* name = nullptr) = 0; ++ + /// Adds a reducemax layer to the network. + /// @param ReduceSumDescriptor - Parameters for the reducemax operation. + /// @param name - Optional name for the layer. +diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp +index 209ef2c..80d4dfb 100644 +--- a/include/armnn/LayerVisitorBase.hpp ++++ b/include/armnn/LayerVisitorBase.hpp +@@ -204,6 +204,10 @@ public: + const ResizeDescriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } + ++ void VisitReverseV2Layer(const IConnectableLayer*, ++ const ReverseV2Descriptor&, ++ const char*) override { DefaultPolicy::Apply(__func__); } ++ + void VisitReduceSumLayer(const IConnectableLayer*, + const ReduceSumDescriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } +diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp +index 5f5ee01..e523d52 100644 +--- a/src/armnn/InternalTypes.hpp ++++ b/src/armnn/InternalTypes.hpp +@@ -72,6 +72,7 @@ + X(Switch) \ + X(Transpose) \ + X(TransposeConvolution2d) \ ++ X(ReverseV2) \ + X(ReduceSum) + + /// When adding a new layer, adapt also the LastLayer enum value in the +diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp +index 5092828..7ac517c 100644 +--- a/src/armnn/LayersFwd.hpp ++++ b/src/armnn/LayersFwd.hpp +@@ -54,6 +54,7 @@ + #include "layers/QuantizedLstmLayer.hpp" + #include "layers/ReshapeLayer.hpp" + #include "layers/ResizeLayer.hpp" ++#include "layers/ReverseV2Layer.hpp" + #include "layers/ReduceSumLayer.hpp" + #include "layers/SliceLayer.hpp" + #include "layers/SoftmaxLayer.hpp" +@@ -143,6 +144,7 @@ DECLARE_LAYER(QLstm) + DECLARE_LAYER(QuantizedLstm) + DECLARE_LAYER(Reshape) + DECLARE_LAYER(Resize) ++DECLARE_LAYER(ReverseV2) + DECLARE_LAYER(ReduceSum) + DECLARE_LAYER(Slice) + DECLARE_LAYER(Softmax) +diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp +index 335e104..bc6738e 100644 +--- a/src/armnn/Network.cpp ++++ b/src/armnn/Network.cpp +@@ -1472,6 +1472,12 @@ resizeDescriptor, const char* name) + return m_Graph->AddLayer<ResizeLayer>(resizeDescriptor, name); + } + ++IConnectableLayer* Network::AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor, ++ const char* name) ++{ ++ return m_Graph->AddLayer<ReverseV2Layer>(reversev2Descriptor, name); ++} ++ + IConnectableLayer* Network::AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor, + const char* name) + { +diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp +index 6c767f3..95d235e 100644 +--- a/src/armnn/Network.hpp ++++ b/src/armnn/Network.hpp +@@ -160,6 +160,9 @@ public: + IConnectableLayer* AddResizeLayer(const ResizeDescriptor& resizeDescriptor, + const char* name = nullptr) override; + ++ IConnectableLayer* AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor, ++ const char* name = nullptr) override; ++ + IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor, + const char* name = nullptr) override; + +diff --git a/src/armnn/layers/ReverseV2Layer.cpp b/src/armnn/layers/ReverseV2Layer.cpp +new file mode 100644 +index 0000000..0921a3d +--- /dev/null ++++ b/src/armnn/layers/ReverseV2Layer.cpp +@@ -0,0 +1,75 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReverseV2Layer.hpp" ++#include "LayerCloneBase.hpp" ++ ++#include <armnn/TypesUtils.hpp> ++ ++#include <armnnUtils/DataLayoutIndexed.hpp> ++ ++#include <backendsCommon/WorkloadData.hpp> ++#include <backendsCommon/WorkloadFactory.hpp> ++ ++using namespace armnnUtils; ++ ++namespace armnn ++{ ++ ++ReverseV2Layer::ReverseV2Layer(const ReverseV2Descriptor& param, const char* name) ++ : LayerWithParameters(1, 1, LayerType::ReverseV2, param, name) ++{ ++} ++ ++std::unique_ptr<IWorkload> ReverseV2Layer::CreateWorkload(const IWorkloadFactory& factory) const ++{ ++ ReverseV2QueueDescriptor descriptor; ++ return factory.CreateReverseV2(descriptor, PrepInfoAndDesc(descriptor)); ++} ++ ++ReverseV2Layer* ReverseV2Layer::Clone(Graph& graph) const ++{ ++ return CloneBase<ReverseV2Layer>(graph, m_Param, GetName()); ++} ++ ++std::vector<TensorShape> ReverseV2Layer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const ++{ ++ ARMNN_ASSERT(inputShapes.size() == 1); ++ ++ const TensorShape& inputShape = inputShapes[0]; ++ const DataLayoutIndexed dimensionIndices = m_Param.m_DataLayout; ++ ++ unsigned int outWidth = m_Param.m_TargetWidth; ++ unsigned int outHeight = m_Param.m_TargetHeight; ++ unsigned int outChannels = inputShape[dimensionIndices.GetChannelsIndex()]; ++ unsigned int outBatch = inputShape[0]; ++ ++ TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ? ++ TensorShape( { outBatch, outHeight, outWidth, outChannels } ) : ++ TensorShape( { outBatch, outChannels, outHeight, outWidth }); ++ ++ return std::vector<TensorShape>({ tensorShape }); ++} ++ ++void ReverseV2Layer::ValidateTensorShapesFromInputs() ++{ ++ VerifyLayerConnections(1, CHECK_LOCATION()); ++ ++ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() }); ++ ++ ARMNN_ASSERT(inferredShapes.size() == 1); ++ ++ ConditionalThrowIfNotEqual<LayerValidationException>( ++ "ReverseV2Layer: TensorShape set on OutputSlot[0] does not match the inferred shape.", ++ GetOutputSlot(0).GetTensorInfo().GetShape(), ++ inferredShapes[0]); ++} ++ ++void ReverseV2Layer::Accept(ILayerVisitor& visitor) const ++{ ++ visitor.VisitReverseV2Layer(this, GetParameters(), GetName()); ++} ++ ++} // namespace armnn +diff --git a/src/armnn/layers/ReverseV2Layer.hpp b/src/armnn/layers/ReverseV2Layer.hpp +new file mode 100644 +index 0000000..65acdfb +--- /dev/null ++++ b/src/armnn/layers/ReverseV2Layer.hpp +@@ -0,0 +1,48 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++#pragma once ++ ++#include "LayerWithParameters.hpp" ++ ++namespace armnn ++{ ++ ++/// This layer represents a reversev2 operation. ++class ReverseV2Layer : public LayerWithParameters<ReverseV2Descriptor> ++{ ++public: ++ /// Makes a workload for the ReverseV2 type. ++ /// @param [in] graph The graph where this layer can be found. ++ /// @param [in] factory The workload factory which will create the workload. ++ /// @return A pointer to the created workload, or nullptr if not created. ++ virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override; ++ ++ /// Creates a dynamically-allocated copy of this layer. ++ /// @param [in] graph The graph into which this layer is being cloned. ++ ReverseV2Layer* Clone(Graph& graph) const override; ++ ++ /// Check if the input tensor shape(s) ++ /// will lead to a valid configuration of @ref ReverseV2Layer. ++ void ValidateTensorShapesFromInputs() override; ++ ++ /// By default returns inputShapes if the number of inputs are equal to number of outputs, ++ /// otherwise infers the output shapes from given input shapes and layer properties. ++ /// @param [in] inputShapes The input shapes layer has. ++ /// @return A vector to the inferred output shape. ++ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override; ++ ++ void Accept(ILayerVisitor& visitor) const override; ++ ++protected: ++ /// Constructor to create a ReverseV2Layer. ++ /// @param [in] param ReverseV2Descriptor to configure the resize operation. ++ /// @param [in] name Optional name for the layer. ++ ReverseV2Layer(const ReverseV2Descriptor& param, const char* name); ++ ++ /// Default destructor ++ ~ReverseV2Layer() = default; ++}; ++ ++} // namespace armnn +diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp +index e5400dc..3da7288 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.cpp ++++ b/src/armnnTfLiteParser/TfLiteParser.cpp +@@ -531,6 +531,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o + m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE] = &TfLiteParser::ParseTranspose; + m_ParserFunctions[tflite::BuiltinOperator_TRANSPOSE_CONV] = &TfLiteParser::ParseTransposeConv; + m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack; ++ m_ParserFunctions[tflite::BuiltinOperator_REVERSE_V2] = &TfLiteParser::ParseReverse_v2; + m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax; + m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum; + +@@ -2732,6 +2733,46 @@ void TfLiteParser::ParseSplitV(size_t subgraphIndex, size_t operatorIndex) + RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); + } + ++void TfLiteParser::ParseReverse_v2(size_t subgraphIndex, size_t operatorIndex) ++{ ++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(inputs.size(), 2); ++ ++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(outputs.size(), 1); ++ ++ auto layerName = boost::str(boost::format("Reverse_v2:%1%:%2%") % subgraphIndex % operatorIndex); ++ ++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]); ++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]); ++ ++ TensorShape shape = sizeTensorInfo0.GetShape(); ++ ++ // Get const axis value from model and set it to descriptor. ++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer); ++ ++ ReverseV2Descriptor desc; ++ desc.m_Axis = axisBufferPtr->data.data()[0]; ++ desc.m_TargetHeight = shape[1]; ++ desc.m_TargetWidth = shape[2]; ++ desc.m_DataLayout = armnn::DataLayout::NHWC; ++ ++ // Register a new layer object, ReverseV2, to in-memory network of ARMNN. ++ IConnectableLayer *layer = m_Network->AddReverseV2Layer(desc, layerName.c_str()); ++ ++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); ++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); ++ ++ // Register input tensor to the layer. ++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]}); ++ ++ // Register output tensor to the layer. ++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); ++} ++ + void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex) + { + const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex]; +diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp +index 13d1cb4..7970559 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.hpp ++++ b/src/armnnTfLiteParser/TfLiteParser.hpp +@@ -133,6 +133,7 @@ private: + void ParseTranspose(size_t subgraphIndex, size_t operatorIndex); + void ParseTransposeConv(size_t subgraphIndex, size_t operatorIndex); + void ParseUnpack(size_t subgraphIndex, size_t operatorIndex); ++ void ParseReverse_v2(size_t subgraphIndex, size_t operatorIndex); + void ParseArgMax(size_t subgraphIndex, size_t operatorIndex); + void ParseSum(size_t subgraphIndex, size_t operatorIndex); + +diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp +index 245b165..0c1c1e2 100644 +--- a/src/backends/backendsCommon/LayerSupportBase.cpp ++++ b/src/backends/backendsCommon/LayerSupportBase.cpp +@@ -615,6 +615,14 @@ bool LayerSupportBase::IsTransposeSupported(const TensorInfo& /*input*/, + return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); + } + ++bool LayerSupportBase::IsReverseV2Supported(const TensorInfo& /*input*/, ++ const TensorInfo& /*output*/, ++ const ReverseV2Descriptor& /*descriptor*/, ++ Optional<std::string&> reasonIfUnsupported) const ++{ ++ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); ++} ++ + bool LayerSupportBase::IsReduceSumSupported(const TensorInfo& /*input*/, + const TensorInfo& /*output*/, + const ReduceSumDescriptor& /*descriptor*/, +diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp +index 9b39f8f..6f1e6e0 100644 +--- a/src/backends/backendsCommon/LayerSupportBase.hpp ++++ b/src/backends/backendsCommon/LayerSupportBase.hpp +@@ -377,6 +377,11 @@ public: + const TransposeDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + ++ bool IsReverseV2Supported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReverseV2Descriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; ++ + bool IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp +index 7455ab5..dcbec11 100644 +--- a/src/backends/backendsCommon/WorkloadData.cpp ++++ b/src/backends/backendsCommon/WorkloadData.cpp +@@ -3483,6 +3483,55 @@ void ElementwiseUnaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) + ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); + } + ++void ReverseV2QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const ++{ ++ const std::string descriptorName{"ReverseV2QueueDescriptor"}; ++ ++ ValidateNumInputs(workloadInfo, descriptorName, 1); ++ ValidateNumOutputs(workloadInfo, descriptorName, 1); ++ ++ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; ++ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; ++ ++ ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input"); ++ ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output"); ++ ++ std::vector<DataType> supportedTypes = ++ { ++ DataType::BFloat16, ++ DataType::Float16, ++ DataType::Float32, ++ DataType::QAsymmS8, ++ DataType::QAsymmU8, ++ DataType::QSymmS16 ++ }; ++ ++ ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); ++ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); ++ ++ // ReverseV2 only changes width and height: batch and channel count must match. ++ const unsigned int inputBatchSize = inputTensorInfo.GetShape()[0]; ++ const unsigned int outputBatchSize = outputTensorInfo.GetShape()[0]; ++ if (inputBatchSize != outputBatchSize) ++ { ++ throw InvalidArgumentException( ++ boost::str(boost::format("%1%: Input batch size (%2%) " ++ "does not match output batch size (%3%)") % ++ descriptorName % inputBatchSize % outputBatchSize)); ++ } ++ ++ DataLayoutIndexed dimensionIndices(m_Parameters.m_DataLayout); ++ const unsigned int inputChannelCount = inputTensorInfo.GetShape()[dimensionIndices.GetChannelsIndex()]; ++ const unsigned int outputChannelCount = outputTensorInfo.GetShape()[dimensionIndices.GetChannelsIndex()]; ++ if (inputChannelCount != outputChannelCount) ++ { ++ throw InvalidArgumentException( ++ boost::str(boost::format("%1%: Input channel count (%2%) " ++ "does not match output channel count (%3%)") % ++ descriptorName % inputChannelCount % outputChannelCount)); ++ } ++} ++ + void ReduceSumQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const + { + const std::string descriptorName{"ReduceSumQueueDescriptor"}; +diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp +index 6f203b5..0cbe8aa 100644 +--- a/src/backends/backendsCommon/WorkloadData.hpp ++++ b/src/backends/backendsCommon/WorkloadData.hpp +@@ -634,6 +634,11 @@ struct ElementwiseUnaryQueueDescriptor : QueueDescriptorWithParameters<Elementwi + void Validate(const WorkloadInfo& workloadInfo) const; + }; + ++struct ReverseV2QueueDescriptor : QueueDescriptorWithParameters<ReverseV2Descriptor> ++{ ++ void Validate(const WorkloadInfo& workloadInfo) const; ++}; ++ + struct ReduceSumQueueDescriptor : QueueDescriptorWithParameters<ReduceSumDescriptor> + { + void Validate(const WorkloadInfo& workloadInfo) const; +diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp +index b7195f7..31e6bfd 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.cpp ++++ b/src/backends/backendsCommon/WorkloadFactory.cpp +@@ -1178,6 +1178,18 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, + + break; + } ++ case LayerType::ReverseV2: ++ { ++ auto cLayer = PolymorphicDowncast<const ReverseV2Layer*>(&layer); ++ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); ++ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); ++ ++ result = layerSupportObject->IsReverseV2Supported(OverrideDataType(input, dataType), ++ OverrideDataType(output, dataType), ++ cLayer->GetParameters(), ++ reason); ++ break; ++ } + case LayerType::ReduceSum: + { + auto cLayer = PolymorphicDowncast<const ReduceSumLayer*>(&layer); +@@ -1596,6 +1608,12 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateTransposeConvolution2d( + return std::unique_ptr<IWorkload>(); + } + ++std::unique_ptr<IWorkload> IWorkloadFactory::CreateReverseV2(const ReverseV2QueueDescriptor& /*descriptor*/, ++ const WorkloadInfo& /*info*/) const ++{ ++ return std::unique_ptr<IWorkload>(); ++} ++ + std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& /*descriptor*/, + const WorkloadInfo& /*info*/) const + { +diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp +index 0d98c92..740da18 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.hpp ++++ b/src/backends/backendsCommon/WorkloadFactory.hpp +@@ -250,6 +250,10 @@ public: + virtual std::unique_ptr<IWorkload> CreateTransposeConvolution2d( + const TransposeConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const; ++ ++ virtual std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor, ++ const WorkloadInfo& info) const; ++ + virtual std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, + const WorkloadInfo& info) const; + }; +diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp +index 333ad4d..1ac947e 100644 +--- a/src/backends/reference/RefLayerSupport.cpp ++++ b/src/backends/reference/RefLayerSupport.cpp +@@ -2132,6 +2132,35 @@ bool RefLayerSupport::IsTransposeSupported(const TensorInfo& input, + return supported; + } + ++bool RefLayerSupport::IsReverseV2Supported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReverseV2Descriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported) const ++{ ++ IgnoreUnused(descriptor); ++ bool supported = true; ++ std::array<DataType,6> supportedTypes = ++ { ++ DataType::BFloat16, ++ DataType::Float32, ++ DataType::Float16, ++ DataType::QAsymmS8, ++ DataType::QAsymmU8, ++ DataType::QSymmS16 ++ }; ++ ++ supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported, ++ "Reference ReverseV2: input type not supported"); ++ ++ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported, ++ "Reference ReverseV2: output type not supported"); ++ ++ supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported, ++ "Reference ReverseV2: input and output types not matching"); ++ ++ return supported; ++} ++ + bool RefLayerSupport::IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp +index 766ddfa..cdc2adb 100644 +--- a/src/backends/reference/RefLayerSupport.hpp ++++ b/src/backends/reference/RefLayerSupport.hpp +@@ -342,6 +342,11 @@ public: + const TransposeDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + ++ bool IsReverseV2Supported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReverseV2Descriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; ++ + bool IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp +index 9602df5..7d1e810 100644 +--- a/src/backends/reference/RefWorkloadFactory.cpp ++++ b/src/backends/reference/RefWorkloadFactory.cpp +@@ -632,6 +632,12 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateTransposeConvolution2d( + return std::make_unique<RefTransposeConvolution2dWorkload>(descriptor, info); + } + ++std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReverseV2(const ReverseV2QueueDescriptor& descriptor, ++ const WorkloadInfo& info) const ++{ ++ return std::make_unique<RefReverseV2Workload>(descriptor, info); ++} ++ + std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, + const WorkloadInfo& info) const + { +diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp +index 93cab9a..14df6b8 100644 +--- a/src/backends/reference/RefWorkloadFactory.hpp ++++ b/src/backends/reference/RefWorkloadFactory.hpp +@@ -250,6 +250,9 @@ public: + std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + ++ std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor, ++ const WorkloadInfo& info) const override; ++ + std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + +diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt +index d5eceff..f68a673 100644 +--- a/src/backends/reference/workloads/CMakeLists.txt ++++ b/src/backends/reference/workloads/CMakeLists.txt +@@ -51,6 +51,8 @@ list(APPEND armnnRefBackendWorkloads_sources + Pad.hpp + ReduceSum.cpp + ReduceSum.hpp ++ ReverseV2.cpp ++ ReverseV2.hpp + Pooling2d.cpp + Pooling2d.hpp + PreluImpl.cpp +@@ -174,6 +176,8 @@ list(APPEND armnnRefBackendWorkloads_sources + TensorBufferArrayView.hpp + TransposeConvolution2d.cpp + TransposeConvolution2d.hpp ++ RefReverseV2Workload.cpp ++ RefReverseV2Workload.hpp + RefReduceSumWorkload.cpp + RefReduceSumWorkload.hpp + ) +diff --git a/src/backends/reference/workloads/RefReverseV2Workload.cpp b/src/backends/reference/workloads/RefReverseV2Workload.cpp +new file mode 100644 +index 0000000..73ceba4 +--- /dev/null ++++ b/src/backends/reference/workloads/RefReverseV2Workload.cpp +@@ -0,0 +1,35 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "RefReverseV2Workload.hpp" ++ ++#include "ReverseV2.hpp" ++#include "RefWorkloadUtils.hpp" ++#include "BaseIterator.hpp" ++#include "Profiling.hpp" ++ ++#include "BaseIterator.hpp" ++#include "Decoders.hpp" ++#include "Encoders.hpp" ++ ++namespace armnn ++{ ++ ++void RefReverseV2Workload::Execute() const ++{ ++ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReverseV2Workload_Execute"); ++ ++ const TensorInfo& inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); ++ const TensorInfo& outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); ++ ++ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map()); ++ Decoder<float> &decoder = *decoderPtr; ++ ++ float *output = GetOutputTensorData<float>(0, m_Data); ++ ++ ReverseV2(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Axis); ++} ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/RefReverseV2Workload.hpp b/src/backends/reference/workloads/RefReverseV2Workload.hpp +new file mode 100644 +index 0000000..3c71dfa +--- /dev/null ++++ b/src/backends/reference/workloads/RefReverseV2Workload.hpp +@@ -0,0 +1,21 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include <backendsCommon/Workload.hpp> ++#include <backendsCommon/WorkloadData.hpp> ++ ++namespace armnn ++{ ++ ++class RefReverseV2Workload : public BaseWorkload<ReverseV2QueueDescriptor> ++{ ++public: ++ using BaseWorkload<ReverseV2QueueDescriptor>::BaseWorkload; ++ virtual void Execute() const override; ++}; ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp +index c80ed43..9427d5c 100644 +--- a/src/backends/reference/workloads/RefWorkloads.hpp ++++ b/src/backends/reference/workloads/RefWorkloads.hpp +@@ -67,4 +67,5 @@ + #include "Softmax.hpp" + #include "Splitter.hpp" + #include "TensorBufferArrayView.hpp" ++#include "RefReverseV2Workload.hpp" + #include "RefReduceSumWorkload.hpp" +diff --git a/src/backends/reference/workloads/ReverseV2.cpp b/src/backends/reference/workloads/ReverseV2.cpp +new file mode 100644 +index 0000000..1bfd350 +--- /dev/null ++++ b/src/backends/reference/workloads/ReverseV2.cpp +@@ -0,0 +1,80 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReverseV2.hpp" ++ ++#include <armnnUtils/TensorUtils.hpp> ++ ++#include <boost/numeric/conversion/cast.hpp> ++#include <algorithm> ++#include <iostream> ++ ++namespace armnn ++{ ++ ++void ReverseV2(Decoder<float>& in, float *out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, int32_t axis) ++{ ++ IgnoreUnused(outputTensorInfo); ++ ++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis); ++ const unsigned int batchSize = inputTensorInfo.GetShape()[0]; ++ unsigned int height = inputTensorInfo.GetShape()[1]; ++ unsigned int width = inputTensorInfo.GetShape()[2]; ++ unsigned int channel = inputTensorInfo.GetShape()[3]; ++ ++ // TODO. Integreate below four if conditions with one. ++ if (uAxis == 3) { ++ for (unsigned int b = 0; b < batchSize; ++b) { ++ for (unsigned int y = 0; y < height; ++y) { ++ for (unsigned int x = 0; x < width; ++x) { ++ for (unsigned int c = 0; c < channel; ++c) { ++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c]; ++ float in_val = in.Get(); ++ out[(b * height * width * channel) + (y * width * channel) + (x * channel) + (channel - 1 - c)] = in_val; ++ } ++ } ++ } ++ } ++ } else if (uAxis == 2) { ++ for (unsigned int b = 0; b < batchSize; ++b) { ++ for (unsigned int y = 0; y < height; ++y) { ++ for (unsigned int c = 0; c < channel; ++c) { ++ for (unsigned int x = 0; x < width; ++x) { ++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c]; ++ float in_val = in.Get(); ++ out[(b * height * width * channel) + (y * width * channel) + ((width - 1 - x) * channel) + c] = in_val; ++ } ++ } ++ } ++ } ++ } else if (uAxis == 1) { ++ for (unsigned int b = 0; b < batchSize; ++b) { ++ for (unsigned int y = 0; y < height; ++y) { ++ for (unsigned int x = 0; x < width; ++x) { ++ for (unsigned int c = 0; c < channel; ++c) { ++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c]; ++ float in_val = in.Get(); ++ out[(b * height * width * channel) + ((height - 1 - y) * width * channel) + (x * channel) + c] = in_val; ++ } ++ } ++ } ++ } ++ } else if (uAxis == 0) { ++ for (unsigned int b = 0; b < batchSize; ++b) { ++ for (unsigned int y = 0; y < height; ++y) { ++ for (unsigned int x = 0; x < width; ++x) { ++ for (unsigned int c = 0; c < channel; ++c) { ++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c]; ++ float in_val = in.Get(); ++ out[(b * height * width * channel) + (y * width * channel) + (x * channel) + c] = in_val; ++ } ++ } ++ } ++ } ++ } ++} ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/ReverseV2.hpp b/src/backends/reference/workloads/ReverseV2.hpp +new file mode 100644 +index 0000000..3957959 +--- /dev/null ++++ b/src/backends/reference/workloads/ReverseV2.hpp +@@ -0,0 +1,20 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include "armnn/Tensor.hpp" ++#include "armnn/Descriptors.hpp" ++ ++#include "Decoders.hpp" ++ ++namespace armnn ++{ ++ ++void ReverseV2(Decoder<float>& in, float* out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, int32_t axis); ++ ++} //namespace armnn ++ +-- +2.7.4 + diff --git a/packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch b/packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch new file mode 100644 index 000000000..ed8a63923 --- /dev/null +++ b/packaging/0005-backends-test-Add-ReverseV2-op-test-cases.patch @@ -0,0 +1,282 @@ +From ba1bc1ba932624309210e838fcf418dcbf06ea62 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Tue, 8 Sep 2020 10:47:24 +0900 +Subject: [PATCH 05/10] backends/test: Add ReverseV2 op test cases + +Change-Id: I46306668501f160fef00e6e01547d285c75c7125 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + src/backends/backendsCommon/test/CMakeLists.txt | 2 + + src/backends/backendsCommon/test/LayerTests.hpp | 1 + + .../test/layerTests/ReverseV2TestImpl.cpp | 186 +++++++++++++++++++++ + .../test/layerTests/ReverseV2TestImpl.hpp | 25 +++ + src/backends/reference/test/RefLayerTests.cpp | 5 + + 5 files changed, 219 insertions(+) + create mode 100644 src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp + create mode 100644 src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp + +diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt +index 951a46d..e548b2b 100644 +--- a/src/backends/backendsCommon/test/CMakeLists.txt ++++ b/src/backends/backendsCommon/test/CMakeLists.txt +@@ -133,6 +133,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources + layerTests/ReshapeTestImpl.hpp + layerTests/ResizeTestImpl.cpp + layerTests/ResizeTestImpl.hpp ++ layerTests/ReverseV2TestImpl.cpp ++ layerTests/ReverseV2TestImpl.hpp + layerTests/RsqrtTestImpl.cpp + layerTests/RsqrtTestImpl.hpp + layerTests/SliceTestImpl.cpp +diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp +index 25f411f..b3cfe4a 100644 +--- a/src/backends/backendsCommon/test/LayerTests.hpp ++++ b/src/backends/backendsCommon/test/LayerTests.hpp +@@ -45,6 +45,7 @@ + #include <backendsCommon/test/layerTests/PreluTestImpl.hpp> + #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp> + #include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp> ++#include <backendsCommon/test/layerTests/ReverseV2TestImpl.hpp> + #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp> + #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp> + #include <backendsCommon/test/layerTests/RsqrtTestImpl.hpp> +diff --git a/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp +new file mode 100644 +index 0000000..c0134a4 +--- /dev/null ++++ b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.cpp +@@ -0,0 +1,186 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReverseV2TestImpl.hpp" ++ ++#include <backendsCommon/test/DataTypeUtils.hpp> ++#include <backendsCommon/test/TensorCopyUtils.hpp> ++#include <backendsCommon/test/WorkloadTestUtils.hpp> ++ ++#include <test/TensorHelpers.hpp> ++ ++namespace ++{ ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReverseV2TestCommon( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, ++ const armnn::TensorInfo inputTensorInfo, ++ const armnn::TensorInfo outputTensorInfo, ++ const std::vector<float>& inputData, ++ const std::vector<float>& outputData, ++ int axis = 3) ++{ ++ IgnoreUnused(memoryManager); ++ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo)); ++ ++ LayerTestResult<float, 4> result(outputTensorInfo); ++ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData); ++ ++ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); ++ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); ++ ++ armnn::ReverseV2QueueDescriptor descriptor; ++ unsigned int updated_idx = static_cast<uint32_t>(axis); ++ if (axis < 0) { ++ updated_idx = static_cast<uint32_t>(static_cast<int32_t>(inputTensorInfo.GetNumDimensions()) + axis); ++ } ++ ++ descriptor.m_Parameters.m_Axis = updated_idx; ++ descriptor.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW; ++ armnn::WorkloadInfo info; ++ ++ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); ++ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); ++ ++ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReverseV2(descriptor, info); ++ ++ inputHandle->Allocate(); ++ outputHandle->Allocate(); ++ ++ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin()); ++ ++ workload->Execute(); ++ ++ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get()); ++ ++ return result; ++} ++ ++} // namespace ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReverseV2SimpleTest( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 1, 1, 5 }; ++ const armnn::TensorShape outputShape{ 1, 1, 1, 5}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f }); ++ std::vector<float> outputValues({ 9.0f, 10.0f, 8.0f, 2.0f, 5.0f }); ++ ++ return ReverseV2TestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, -1); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReverseV2MultiChannel_1Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 }; ++ const armnn::TensorShape outputShape{ 1, 3, 2, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ std::vector<float> outputValues({ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f}); ++ ++ return ReverseV2TestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 1); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReverseV2MultiChannel_2Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 }; ++ const armnn::TensorShape outputShape{ 1, 3, 2, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ std::vector<float> outputValues({ 5.0f, 6.0f, 7.0f, 8.0f, ++ 1.0f, 2.0f, 3.0f, 4.0f, ++ ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ ++ 500.0f, 600.0f, 700.0f, 800.0f, ++ 100.0f, 200.0f, 300.0f, 400.0f }); ++ ++ return ReverseV2TestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 2); ++} ++ ++// Explicit template specializations ++ ++template LayerTestResult<float, 4> ++ReverseV2SimpleTest<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReverseV2MultiChannel_1Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReverseV2MultiChannel_2Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +diff --git a/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp +new file mode 100644 +index 0000000..4eb93cf +--- /dev/null ++++ b/src/backends/backendsCommon/test/layerTests/ReverseV2TestImpl.hpp +@@ -0,0 +1,25 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include "LayerTestResult.hpp" ++ ++#include <ResolveType.hpp> ++ ++#include <armnn/backends/IBackendInternal.hpp> ++#include <backendsCommon/WorkloadFactory.hpp> ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReverseV2SimpleTest(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReverseV2MultiChannel_1Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReverseV2MultiChannel_2Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp +index 9461e2a..a5b1f9d 100644 +--- a/src/backends/reference/test/RefLayerTests.cpp ++++ b/src/backends/reference/test/RefLayerTests.cpp +@@ -1987,4 +1987,9 @@ ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannel2Float32, ReduceSumMultiChannel_2Test< + ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannelFloat32, ReduceSumMultiBatchAndChannelTest<DataType::Float32>) + ARMNN_AUTO_TEST_CASE(ReduceSumMultiBatchAndChannel_2Float32, ReduceSumMultiBatchAndChannel_2Test<DataType::Float32>) + ++// ReverseV2 ++ARMNN_AUTO_TEST_CASE(ReverseV2Float32, ReverseV2SimpleTest<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReverseV2MultiChannelFloat32, ReverseV2MultiChannel_1Test<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReverseV2MultiChannel2Float32, ReverseV2MultiChannel_2Test<DataType::Float32>) ++ + BOOST_AUTO_TEST_SUITE_END() +-- +2.7.4 + diff --git a/packaging/0006-backends-reference-Add-ReduceMax-op-support.patch b/packaging/0006-backends-reference-Add-ReduceMax-op-support.patch new file mode 100644 index 000000000..d099562f2 --- /dev/null +++ b/packaging/0006-backends-reference-Add-ReduceMax-op-support.patch @@ -0,0 +1,892 @@ +From caa3fe740a66fe69c89bbd0fb9bd0183fb327f22 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Tue, 8 Sep 2020 11:43:19 +0900 +Subject: [PATCH 06/10] backends/reference: Add ReduceMax op support + +Change-Id: I5f8825a94aa0d24ffe5890c6c42088f7cdba4860 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + CMakeLists.txt | 2 + + include/armnn/Descriptors.hpp | 32 ++++++++++ + include/armnn/DescriptorsFwd.hpp | 1 + + include/armnn/ILayerSupport.hpp | 5 ++ + include/armnn/ILayerVisitor.hpp | 8 +++ + include/armnn/INetwork.hpp | 7 +++ + include/armnn/LayerVisitorBase.hpp | 4 ++ + src/armnn/InternalTypes.hpp | 1 + + src/armnn/LayersFwd.hpp | 2 + + src/armnn/Network.cpp | 6 ++ + src/armnn/Network.hpp | 3 + + src/armnn/layers/ReduceMaxLayer.cpp | 70 +++++++++++++++++++++ + src/armnn/layers/ReduceMaxLayer.hpp | 48 +++++++++++++++ + src/armnnTfLiteParser/TfLiteParser.cpp | 41 +++++++++++++ + src/armnnTfLiteParser/TfLiteParser.hpp | 1 + + src/backends/backendsCommon/LayerSupportBase.cpp | 8 +++ + src/backends/backendsCommon/LayerSupportBase.hpp | 5 ++ + src/backends/backendsCommon/WorkloadData.cpp | 39 ++++++++++++ + src/backends/backendsCommon/WorkloadData.hpp | 5 ++ + src/backends/backendsCommon/WorkloadFactory.cpp | 18 ++++++ + src/backends/backendsCommon/WorkloadFactory.hpp | 3 + + src/backends/reference/RefLayerSupport.cpp | 30 +++++++++ + src/backends/reference/RefLayerSupport.hpp | 5 ++ + src/backends/reference/RefWorkloadFactory.cpp | 6 ++ + src/backends/reference/RefWorkloadFactory.hpp | 3 + + src/backends/reference/workloads/CMakeLists.txt | 4 ++ + src/backends/reference/workloads/ReduceMax.cpp | 71 ++++++++++++++++++++++ + src/backends/reference/workloads/ReduceMax.hpp | 20 ++++++ + .../reference/workloads/RefReduceMaxWorkload.cpp | 35 +++++++++++ + .../reference/workloads/RefReduceMaxWorkload.hpp | 21 +++++++ + src/backends/reference/workloads/RefWorkloads.hpp | 1 + + 31 files changed, 505 insertions(+) + create mode 100644 src/armnn/layers/ReduceMaxLayer.cpp + create mode 100644 src/armnn/layers/ReduceMaxLayer.hpp + create mode 100644 src/backends/reference/workloads/ReduceMax.cpp + create mode 100644 src/backends/reference/workloads/ReduceMax.hpp + create mode 100644 src/backends/reference/workloads/RefReduceMaxWorkload.cpp + create mode 100644 src/backends/reference/workloads/RefReduceMaxWorkload.hpp + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 52c8785..631c76f 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -382,6 +382,8 @@ list(APPEND armnn_sources + src/armnn/layers/TransposeLayer.cpp + src/armnn/layers/ReverseV2Layer.hpp + src/armnn/layers/ReverseV2Layer.cpp ++ src/armnn/layers/ReduceMaxLayer.hpp ++ src/armnn/layers/ReduceMaxLayer.cpp + src/armnn/layers/ReduceSumLayer.hpp + src/armnn/layers/ReduceSumLayer.cpp + src/armnn/BackendRegistry.cpp +diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp +index c7123f1..b7b4f53 100644 +--- a/include/armnn/Descriptors.hpp ++++ b/include/armnn/Descriptors.hpp +@@ -1243,6 +1243,38 @@ struct ReverseV2Descriptor + DataLayout m_DataLayout; + }; + ++/// A ReduceMaxDescriptor for the REDUCE MAX. ++struct ReduceMaxDescriptor ++{ ++ ReduceMaxDescriptor() ++ : m_TargetWidth(0) ++ , m_TargetHeight(0) ++ , m_Axis(0) ++ , m_Keepdims(0) ++ , m_DataLayout(DataLayout::NCHW) ++ {} ++ ++ bool operator ==(const ReduceMaxDescriptor& rhs) const ++ { ++ return m_TargetWidth == rhs.m_TargetWidth && ++ m_TargetHeight == rhs.m_TargetHeight && ++ m_Axis == rhs.m_Axis && ++ m_Keepdims == rhs.m_Keepdims && ++ m_DataLayout == rhs.m_DataLayout; ++ } ++ ++ /// Target width value. ++ uint32_t m_TargetWidth; ++ /// Target height value. ++ uint32_t m_TargetHeight; ++ /// The indices of the dimensions to reduce. ++ int32_t m_Axis; ++ /// If true, retains reduced dimensions with length 1. ++ uint32_t m_Keepdims; ++ /// The data layout to be used (NCHW, NHWC). ++ DataLayout m_DataLayout; ++}; ++ + /// A ReduceSumDescriptor for the REDUCE SUM. + struct ReduceSumDescriptor + { +diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp +index 38e74cd..3b736f9 100644 +--- a/include/armnn/DescriptorsFwd.hpp ++++ b/include/armnn/DescriptorsFwd.hpp +@@ -34,6 +34,7 @@ struct ReshapeDescriptor; + struct ResizeBilinearDescriptor; + struct ResizeDescriptor; + struct ReverseV2Descriptor; ++struct ReduceMaxDescriptor; + struct ReduceSumDescriptor; + struct SoftmaxDescriptor; + struct SpaceToBatchNdDescriptor; +diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp +index 670c856..ad33727 100644 +--- a/include/armnn/ILayerSupport.hpp ++++ b/include/armnn/ILayerSupport.hpp +@@ -397,6 +397,11 @@ public: + const ReverseV2Descriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0; + ++ virtual bool IsReduceMaxSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceMaxDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0; ++ + virtual bool IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp +index a40dbae..eb09f22 100644 +--- a/include/armnn/ILayerVisitor.hpp ++++ b/include/armnn/ILayerVisitor.hpp +@@ -411,6 +411,14 @@ public: + const ReverseV2Descriptor& reversev2Descriptor, + const char* name = nullptr) = 0; + ++ /// Function that a reduce_max layer should call back to when its Accept(ILayerVisitor&) function is invoked. ++ /// @param layer - pointer to the layer which is calling back to this visit function. ++ /// @param ReduceMaxDescriptor - Parameters for the reduce max operation. ++ /// @param name - Optional name for the layer. ++ virtual void VisitReduceMaxLayer(const IConnectableLayer* layer, ++ const ReduceMaxDescriptor& reducemaxDescriptor, ++ const char* name = nullptr) = 0; ++ + /// Function that a reduce_sum layer should call back to when its Accept(ILayerVisitor&) function is invoked. + /// @param layer - pointer to the layer which is calling back to this visit function. + /// @param ReduceSumDescriptor - Parameters for the reduce max operation. +diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp +index 6678a1c..b0a3e04 100644 +--- a/include/armnn/INetwork.hpp ++++ b/include/armnn/INetwork.hpp +@@ -368,6 +368,13 @@ public: + const char* name = nullptr) = 0; + + /// Adds a reducemax layer to the network. ++ /// @param ReduceMaxDescriptor - Parameters for the reducemax operation. ++ /// @param name - Optional name for the layer. ++ /// @return - Interface for configuring the layer. ++ virtual IConnectableLayer* AddReduceMaxLayer(const ReduceMaxDescriptor& reducemaxDescriptor, ++ const char* name = nullptr) = 0; ++ ++ /// Adds a reducemax layer to the network. + /// @param ReduceSumDescriptor - Parameters for the reducemax operation. + /// @param name - Optional name for the layer. + /// @return - Interface for configuring the layer. +diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp +index 80d4dfb..4de379a 100644 +--- a/include/armnn/LayerVisitorBase.hpp ++++ b/include/armnn/LayerVisitorBase.hpp +@@ -208,6 +208,10 @@ public: + const ReverseV2Descriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } + ++ void VisitReduceMaxLayer(const IConnectableLayer*, ++ const ReduceMaxDescriptor&, ++ const char*) override { DefaultPolicy::Apply(__func__); } ++ + void VisitReduceSumLayer(const IConnectableLayer*, + const ReduceSumDescriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } +diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp +index e523d52..5c435de 100644 +--- a/src/armnn/InternalTypes.hpp ++++ b/src/armnn/InternalTypes.hpp +@@ -73,6 +73,7 @@ + X(Transpose) \ + X(TransposeConvolution2d) \ + X(ReverseV2) \ ++ X(ReduceMax) \ + X(ReduceSum) + + /// When adding a new layer, adapt also the LastLayer enum value in the +diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp +index 7ac517c..69c133f 100644 +--- a/src/armnn/LayersFwd.hpp ++++ b/src/armnn/LayersFwd.hpp +@@ -55,6 +55,7 @@ + #include "layers/ReshapeLayer.hpp" + #include "layers/ResizeLayer.hpp" + #include "layers/ReverseV2Layer.hpp" ++#include "layers/ReduceMaxLayer.hpp" + #include "layers/ReduceSumLayer.hpp" + #include "layers/SliceLayer.hpp" + #include "layers/SoftmaxLayer.hpp" +@@ -145,6 +146,7 @@ DECLARE_LAYER(QuantizedLstm) + DECLARE_LAYER(Reshape) + DECLARE_LAYER(Resize) + DECLARE_LAYER(ReverseV2) ++DECLARE_LAYER(ReduceMax) + DECLARE_LAYER(ReduceSum) + DECLARE_LAYER(Slice) + DECLARE_LAYER(Softmax) +diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp +index bc6738e..6f916f2 100644 +--- a/src/armnn/Network.cpp ++++ b/src/armnn/Network.cpp +@@ -1478,6 +1478,12 @@ IConnectableLayer* Network::AddReverseV2Layer(const ReverseV2Descriptor& reverse + return m_Graph->AddLayer<ReverseV2Layer>(reversev2Descriptor, name); + } + ++IConnectableLayer* Network::AddReduceMaxLayer(const ReduceMaxDescriptor& reducemaxDescriptor, ++ const char* name) ++{ ++ return m_Graph->AddLayer<ReduceMaxLayer>(reducemaxDescriptor, name); ++} ++ + IConnectableLayer* Network::AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor, + const char* name) + { +diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp +index 95d235e..18383eb 100644 +--- a/src/armnn/Network.hpp ++++ b/src/armnn/Network.hpp +@@ -163,6 +163,9 @@ public: + IConnectableLayer* AddReverseV2Layer(const ReverseV2Descriptor& reversev2Descriptor, + const char* name = nullptr) override; + ++ IConnectableLayer* AddReduceMaxLayer(const ReduceMaxDescriptor& reducemaxDescriptor, ++ const char* name = nullptr) override; ++ + IConnectableLayer* AddReduceSumLayer(const ReduceSumDescriptor& reducesumDescriptor, + const char* name = nullptr) override; + +diff --git a/src/armnn/layers/ReduceMaxLayer.cpp b/src/armnn/layers/ReduceMaxLayer.cpp +new file mode 100644 +index 0000000..21b08e4 +--- /dev/null ++++ b/src/armnn/layers/ReduceMaxLayer.cpp +@@ -0,0 +1,70 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReduceMaxLayer.hpp" ++#include "LayerCloneBase.hpp" ++ ++#include <armnn/TypesUtils.hpp> ++ ++#include <armnnUtils/DataLayoutIndexed.hpp> ++ ++#include <backendsCommon/WorkloadData.hpp> ++#include <backendsCommon/WorkloadFactory.hpp> ++ ++using namespace armnnUtils; ++ ++namespace armnn ++{ ++ ++ReduceMaxLayer::ReduceMaxLayer(const ReduceMaxDescriptor& param, const char* name) ++ : LayerWithParameters(1, 1, LayerType::ReduceMax, param, name) ++{ ++} ++ ++std::unique_ptr<IWorkload> ReduceMaxLayer::CreateWorkload(const IWorkloadFactory& factory) const ++{ ++ ReduceMaxQueueDescriptor descriptor; ++ return factory.CreateReduceMax(descriptor, PrepInfoAndDesc(descriptor)); ++} ++ ++ReduceMaxLayer* ReduceMaxLayer::Clone(Graph& graph) const ++{ ++ return CloneBase<ReduceMaxLayer>(graph, m_Param, GetName()); ++} ++ ++std::vector<TensorShape> ReduceMaxLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const ++{ ++ ARMNN_ASSERT(inputShapes.size() == 1); ++ ++ const TensorShape& inputShape = inputShapes[0]; ++ const DataLayoutIndexed dimensionIndices = m_Param.m_DataLayout; ++ ++ unsigned int outWidth = m_Param.m_TargetWidth; ++ unsigned int outHeight = m_Param.m_TargetHeight; ++ unsigned int outChannels = inputShape[dimensionIndices.GetChannelsIndex()]; ++ unsigned int outBatch = inputShape[0]; ++ ++ TensorShape tensorShape = m_Param.m_DataLayout == armnn::DataLayout::NHWC ? ++ TensorShape( { outBatch, outHeight, outWidth, outChannels } ) : ++ TensorShape( { outBatch, outChannels, outHeight, outWidth }); ++ ++ return std::vector<TensorShape>({ tensorShape }); ++} ++ ++void ReduceMaxLayer::ValidateTensorShapesFromInputs() ++{ ++ VerifyLayerConnections(1, CHECK_LOCATION()); ++ ++ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() }); ++ ++ ARMNN_ASSERT(inferredShapes.size() == 1); ++} ++ ++void ReduceMaxLayer::Accept(ILayerVisitor& visitor) const ++{ ++ visitor.VisitReduceMaxLayer(this, GetParameters(), GetName()); ++} ++ ++} // namespace armnn +diff --git a/src/armnn/layers/ReduceMaxLayer.hpp b/src/armnn/layers/ReduceMaxLayer.hpp +new file mode 100644 +index 0000000..2f8e01c +--- /dev/null ++++ b/src/armnn/layers/ReduceMaxLayer.hpp +@@ -0,0 +1,48 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++#pragma once ++ ++#include "LayerWithParameters.hpp" ++ ++namespace armnn ++{ ++ ++/// This layer represents a reducemax operation. ++class ReduceMaxLayer : public LayerWithParameters<ReduceMaxDescriptor> ++{ ++public: ++ /// Makes a workload for the ReduceMax type. ++ /// @param [in] graph The graph where this layer can be found. ++ /// @param [in] factory The workload factory which will create the workload. ++ /// @return A pointer to the created workload, or nullptr if not created. ++ virtual std::unique_ptr<IWorkload>CreateWorkload(const IWorkloadFactory& factory) const override; ++ ++ /// Creates a dynamically-allocated copy of this layer. ++ /// @param [in] graph The graph into which this layer is being cloned. ++ ReduceMaxLayer* Clone(Graph& graph) const override; ++ ++ /// Check if the input tensor shape(s) ++ /// will lead to a valid configuration of @ref ReduceMaxLayer. ++ void ValidateTensorShapesFromInputs() override; ++ ++ /// By default returns inputShapes if the number of inputs are equal to number of outputs, ++ /// otherwise infers the output shapes from given input shapes and layer properties. ++ /// @param [in] inputShapes The input shapes layer has. ++ /// @return A vector to the inferred output shape. ++ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override; ++ ++ void Accept(ILayerVisitor& visitor) const override; ++ ++protected: ++ /// Constructor to create a ReduceMaxLayer. ++ /// @param [in] param ReduceMaxDescriptor to configure the resize operation. ++ /// @param [in] name Optional name for the layer. ++ ReduceMaxLayer(const ReduceMaxDescriptor& param, const char* name); ++ ++ /// Default destructor ++ ~ReduceMaxLayer() = default; ++}; ++ ++} // namespace armnn +diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp +index 3da7288..05a15e5 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.cpp ++++ b/src/armnnTfLiteParser/TfLiteParser.cpp +@@ -533,6 +533,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o + m_ParserFunctions[tflite::BuiltinOperator_UNPACK] = &TfLiteParser::ParseUnpack; + m_ParserFunctions[tflite::BuiltinOperator_REVERSE_V2] = &TfLiteParser::ParseReverse_v2; + m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax; ++ m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MAX] = &TfLiteParser::ParseReduceMax; + m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum; + + // register supported custom operators +@@ -2814,6 +2815,46 @@ void TfLiteParser::ParseArgMax(size_t subgraphIndex, size_t operatorIndex) + RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); + } + ++void TfLiteParser::ParseReduceMax(size_t subgraphIndex, size_t operatorIndex) ++{ ++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(inputs.size(), 2); ++ ++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(outputs.size(), 1); ++ ++ auto layerName = boost::str(boost::format("ReduceMax:%1%:%2%") % subgraphIndex % operatorIndex); ++ ++ armnn::TensorInfo sizeTensorInfo0 = ToTensorInfo(inputs[0]); ++ armnn::TensorInfo sizeTensorInfo1 = ToTensorInfo(inputs[1]); ++ armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); ++ ++ TensorShape shape = sizeTensorInfo0.GetShape(); ++ ++ // Get const axis value from model and set it to descriptor. ++ BufferRawPtr axisBufferPtr = GetBuffer(m_Model, inputs[1]->buffer); ++ ++ ReduceMaxDescriptor desc; ++ desc.m_Axis = axisBufferPtr->data.data()[4]; ++ desc.m_TargetHeight = shape[1]; ++ desc.m_TargetWidth = shape[2]; ++ desc.m_DataLayout = armnn::DataLayout::NHWC; ++ ++ // Register a new layer object, ReduceMax, to in-memory network of ARMNN. ++ IConnectableLayer *layer = m_Network->AddReduceMaxLayer(desc, layerName.c_str()); ++ ++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); ++ ++ // Register input tensor to the layer. ++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0]}); ++ ++ // Register output tensor to the layer. ++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); ++} ++ + void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex) + { + const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex]; +diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp +index 7970559..da635ae 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.hpp ++++ b/src/armnnTfLiteParser/TfLiteParser.hpp +@@ -135,6 +135,7 @@ private: + void ParseUnpack(size_t subgraphIndex, size_t operatorIndex); + void ParseReverse_v2(size_t subgraphIndex, size_t operatorIndex); + void ParseArgMax(size_t subgraphIndex, size_t operatorIndex); ++ void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex); + void ParseSum(size_t subgraphIndex, size_t operatorIndex); + + void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot); +diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp +index 0c1c1e2..9fb6737 100644 +--- a/src/backends/backendsCommon/LayerSupportBase.cpp ++++ b/src/backends/backendsCommon/LayerSupportBase.cpp +@@ -623,6 +623,14 @@ bool LayerSupportBase::IsReverseV2Supported(const TensorInfo& /*input*/, + return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); + } + ++bool LayerSupportBase::IsReduceMaxSupported(const TensorInfo& /*input*/, ++ const TensorInfo& /*output*/, ++ const ReduceMaxDescriptor& /*descriptor*/, ++ Optional<std::string&> reasonIfUnsupported) const ++{ ++ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); ++} ++ + bool LayerSupportBase::IsReduceSumSupported(const TensorInfo& /*input*/, + const TensorInfo& /*output*/, + const ReduceSumDescriptor& /*descriptor*/, +diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp +index 6f1e6e0..1c6da1a 100644 +--- a/src/backends/backendsCommon/LayerSupportBase.hpp ++++ b/src/backends/backendsCommon/LayerSupportBase.hpp +@@ -382,6 +382,11 @@ public: + const ReverseV2Descriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + ++ bool IsReduceMaxSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceMaxDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; ++ + bool IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp +index dcbec11..afdfcad 100644 +--- a/src/backends/backendsCommon/WorkloadData.cpp ++++ b/src/backends/backendsCommon/WorkloadData.cpp +@@ -3532,6 +3532,45 @@ void ReverseV2QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const + } + } + ++void ReduceMaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const ++{ ++ const std::string descriptorName{"ReduceMaxQueueDescriptor"}; ++ ++ ValidateNumInputs(workloadInfo, descriptorName, 1); ++ ValidateNumOutputs(workloadInfo, descriptorName, 1); ++ ++ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0]; ++ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; ++ ++ ValidateTensorNumDimensions(inputTensorInfo, descriptorName, 4, "input"); ++ ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output"); ++ ++ std::vector<DataType> supportedTypes = ++ { ++ DataType::BFloat16, ++ DataType::Float16, ++ DataType::Float32, ++ DataType::QAsymmS8, ++ DataType::QAsymmU8, ++ DataType::QSymmS16, ++ DataType::Signed32 ++ }; ++ ++ ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); ++ ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); ++ ++ // ReduceMax only changes width and height: batch and channel count must match. ++ const unsigned int inputBatchSize = inputTensorInfo.GetShape()[0]; ++ const unsigned int outputBatchSize = outputTensorInfo.GetShape()[0]; ++ if (inputBatchSize != outputBatchSize) ++ { ++ throw InvalidArgumentException( ++ boost::str(boost::format("%1%: Input batch size (%2%) " ++ "does not match output batch size (%3%)") % ++ descriptorName % inputBatchSize % outputBatchSize)); ++ } ++} ++ + void ReduceSumQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const + { + const std::string descriptorName{"ReduceSumQueueDescriptor"}; +diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp +index 0cbe8aa..1db7004 100644 +--- a/src/backends/backendsCommon/WorkloadData.hpp ++++ b/src/backends/backendsCommon/WorkloadData.hpp +@@ -639,6 +639,11 @@ struct ReverseV2QueueDescriptor : QueueDescriptorWithParameters<ReverseV2Descrip + void Validate(const WorkloadInfo& workloadInfo) const; + }; + ++struct ReduceMaxQueueDescriptor : QueueDescriptorWithParameters<ReduceMaxDescriptor> ++{ ++ void Validate(const WorkloadInfo& workloadInfo) const; ++}; ++ + struct ReduceSumQueueDescriptor : QueueDescriptorWithParameters<ReduceSumDescriptor> + { + void Validate(const WorkloadInfo& workloadInfo) const; +diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp +index 31e6bfd..639e95c 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.cpp ++++ b/src/backends/backendsCommon/WorkloadFactory.cpp +@@ -1190,6 +1190,18 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, + reason); + break; + } ++ case LayerType::ReduceMax: ++ { ++ auto cLayer = PolymorphicDowncast<const ReduceMaxLayer*>(&layer); ++ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); ++ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); ++ ++ result = layerSupportObject->IsReduceMaxSupported(OverrideDataType(input, dataType), ++ OverrideDataType(output, dataType), ++ cLayer->GetParameters(), ++ reason); ++ break; ++ } + case LayerType::ReduceSum: + { + auto cLayer = PolymorphicDowncast<const ReduceSumLayer*>(&layer); +@@ -1614,6 +1626,12 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateReverseV2(const ReverseV2Queu + return std::unique_ptr<IWorkload>(); + } + ++std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceMax(const ReduceMaxQueueDescriptor& /*descriptor*/, ++ const WorkloadInfo& /*info*/) const ++{ ++ return std::unique_ptr<IWorkload>(); ++} ++ + std::unique_ptr<IWorkload> IWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& /*descriptor*/, + const WorkloadInfo& /*info*/) const + { +diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp +index 740da18..b068b45 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.hpp ++++ b/src/backends/backendsCommon/WorkloadFactory.hpp +@@ -254,6 +254,9 @@ public: + virtual std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor, + const WorkloadInfo& info) const; + ++ virtual std::unique_ptr<IWorkload> CreateReduceMax(const ReduceMaxQueueDescriptor& descriptor, ++ const WorkloadInfo& info) const; ++ + virtual std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, + const WorkloadInfo& info) const; + }; +diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp +index 1ac947e..91bb954 100644 +--- a/src/backends/reference/RefLayerSupport.cpp ++++ b/src/backends/reference/RefLayerSupport.cpp +@@ -2161,6 +2161,36 @@ bool RefLayerSupport::IsReverseV2Supported(const TensorInfo& input, + return supported; + } + ++bool RefLayerSupport::IsReduceMaxSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceMaxDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported) const ++{ ++ IgnoreUnused(descriptor); ++ bool supported = true; ++ std::array<DataType,7> supportedTypes = ++ { ++ DataType::BFloat16, ++ DataType::Float32, ++ DataType::Float16, ++ DataType::QAsymmS8, ++ DataType::QAsymmU8, ++ DataType::QSymmS16, ++ DataType::Signed32 ++ }; ++ ++ supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported, ++ "Reference ReduceMax: input type not supported"); ++ ++ supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported, ++ "Reference ReduceMax: output type not supported"); ++ ++ supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported, ++ "Reference ReduceMax: input and output types not matching"); ++ ++ return supported; ++} ++ + bool RefLayerSupport::IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp +index cdc2adb..c095f7c 100644 +--- a/src/backends/reference/RefLayerSupport.hpp ++++ b/src/backends/reference/RefLayerSupport.hpp +@@ -347,6 +347,11 @@ public: + const ReverseV2Descriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + ++ bool IsReduceMaxSupported(const TensorInfo& input, ++ const TensorInfo& output, ++ const ReduceMaxDescriptor& descriptor, ++ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; ++ + bool IsReduceSumSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceSumDescriptor& descriptor, +diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp +index 7d1e810..c74516e 100644 +--- a/src/backends/reference/RefWorkloadFactory.cpp ++++ b/src/backends/reference/RefWorkloadFactory.cpp +@@ -638,6 +638,12 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReverseV2(const ReverseV2Qu + return std::make_unique<RefReverseV2Workload>(descriptor, info); + } + ++std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceMax(const ReduceMaxQueueDescriptor& descriptor, ++ const WorkloadInfo& info) const ++{ ++ return std::make_unique<RefReduceMaxWorkload>(descriptor, info); ++} ++ + std::unique_ptr<IWorkload> RefWorkloadFactory::CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, + const WorkloadInfo& info) const + { +diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp +index 14df6b8..289d996 100644 +--- a/src/backends/reference/RefWorkloadFactory.hpp ++++ b/src/backends/reference/RefWorkloadFactory.hpp +@@ -253,6 +253,9 @@ public: + std::unique_ptr<IWorkload> CreateReverseV2(const ReverseV2QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + ++ std::unique_ptr<IWorkload> CreateReduceMax(const ReduceMaxQueueDescriptor& descriptor, ++ const WorkloadInfo& info) const override; ++ + std::unique_ptr<IWorkload> CreateReduceSum(const ReduceSumQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + +diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt +index f68a673..7eaa615 100644 +--- a/src/backends/reference/workloads/CMakeLists.txt ++++ b/src/backends/reference/workloads/CMakeLists.txt +@@ -49,6 +49,8 @@ list(APPEND armnnRefBackendWorkloads_sources + Minimum.hpp + Pad.cpp + Pad.hpp ++ ReduceMax.cpp ++ ReduceMax.hpp + ReduceSum.cpp + ReduceSum.hpp + ReverseV2.cpp +@@ -178,6 +180,8 @@ list(APPEND armnnRefBackendWorkloads_sources + TransposeConvolution2d.hpp + RefReverseV2Workload.cpp + RefReverseV2Workload.hpp ++ RefReduceMaxWorkload.cpp ++ RefReduceMaxWorkload.hpp + RefReduceSumWorkload.cpp + RefReduceSumWorkload.hpp + ) +diff --git a/src/backends/reference/workloads/ReduceMax.cpp b/src/backends/reference/workloads/ReduceMax.cpp +new file mode 100644 +index 0000000..d956201 +--- /dev/null ++++ b/src/backends/reference/workloads/ReduceMax.cpp +@@ -0,0 +1,71 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReduceMax.hpp" ++ ++#include <armnnUtils/TensorUtils.hpp> ++ ++#include <boost/numeric/conversion/cast.hpp> ++ ++namespace armnn ++{ ++ ++void ReduceMax(Decoder<float>& in, float *out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, int32_t axis) ++{ ++ IgnoreUnused(outputTensorInfo); ++ ++ unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis); ++ const unsigned int batchSize = inputTensorInfo.GetShape()[0]; ++ const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis); ++ const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis]; ++ const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), ++ uAxis + 1, ++ inputTensorInfo.GetNumDimensions()); ++ ++ if (batchSize > 0 && uAxis == 2 && inputTensorInfo.GetShape().GetNumDimensions() == 4) { ++ unsigned int height = inputTensorInfo.GetShape()[1]; ++ unsigned int width = inputTensorInfo.GetShape()[2]; ++ unsigned int channel = inputTensorInfo.GetShape()[3]; ++ ++ for (unsigned int b = 0; b < batchSize; ++b) { ++ for (unsigned int c = 0; c < channel; ++c) { ++ auto tmpValue = in.Get(); ++ for (unsigned int y = 0; y < height; ++y) { ++ for (unsigned int x = 0; x < width; ++x) { ++ in[(b * height * width * channel) + (y * width * channel) + (x * channel) + c]; ++ const auto& value = in.Get(); ++ ++ if (value >= tmpValue) ++ tmpValue = value; ++ } ++ } ++ ++ out[b * channel + c] = tmpValue; ++ tmpValue = 0.0f; ++ } ++ } ++ ++ return; ++ } ++ ++ ++ for (unsigned int outer = 0; outer < outerElements; ++outer) { ++ for (unsigned int inner = 0; inner < innerElements; ++inner) { ++ in[outer * axisSize * innerElements + inner]; ++ auto tmpValue = in.Get(); ++ for (unsigned int i = 1; i < axisSize; ++i) { ++ in[(outer * axisSize * innerElements) + (i * innerElements) + inner]; ++ const auto& value = in.Get(); ++ if (value >= tmpValue) { ++ tmpValue = value; ++ } ++ } ++ out[outer * innerElements + inner] = tmpValue; ++ } ++ } ++} ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/ReduceMax.hpp b/src/backends/reference/workloads/ReduceMax.hpp +new file mode 100644 +index 0000000..3d9877a +--- /dev/null ++++ b/src/backends/reference/workloads/ReduceMax.hpp +@@ -0,0 +1,20 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include "armnn/Tensor.hpp" ++#include "armnn/Descriptors.hpp" ++ ++#include "Decoders.hpp" ++ ++namespace armnn ++{ ++ ++void ReduceMax(Decoder<float>& in, float* out, const TensorInfo& inputTensorInfo, ++ const TensorInfo& outputTensorInfo, int32_t axis); ++ ++} //namespace armnn ++ +diff --git a/src/backends/reference/workloads/RefReduceMaxWorkload.cpp b/src/backends/reference/workloads/RefReduceMaxWorkload.cpp +new file mode 100644 +index 0000000..82eb280 +--- /dev/null ++++ b/src/backends/reference/workloads/RefReduceMaxWorkload.cpp +@@ -0,0 +1,35 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "RefReduceMaxWorkload.hpp" ++ ++#include "ReduceMax.hpp" ++#include "RefWorkloadUtils.hpp" ++#include "BaseIterator.hpp" ++#include "Profiling.hpp" ++ ++#include "BaseIterator.hpp" ++#include "Decoders.hpp" ++#include "Encoders.hpp" ++ ++namespace armnn ++{ ++ ++void RefReduceMaxWorkload::Execute() const ++{ ++ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReduceMaxWorkload_Execute"); ++ ++ const TensorInfo& inputTensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); ++ const TensorInfo& outputTensorInfo = GetTensorInfo(m_Data.m_Outputs[0]); ++ ++ std::unique_ptr<Decoder<float>> decoderPtr = MakeDecoder<float>(inputTensorInfo, m_Data.m_Inputs[0]->Map()); ++ Decoder<float> &decoder = *decoderPtr; ++ ++ float *output = GetOutputTensorData<float>(0, m_Data); ++ ++ ReduceMax(decoder, output, inputTensorInfo, outputTensorInfo, m_Data.m_Parameters.m_Axis); ++} ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/RefReduceMaxWorkload.hpp b/src/backends/reference/workloads/RefReduceMaxWorkload.hpp +new file mode 100644 +index 0000000..df9cb1e +--- /dev/null ++++ b/src/backends/reference/workloads/RefReduceMaxWorkload.hpp +@@ -0,0 +1,21 @@ ++// ++// Copyright © 2017 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include <backendsCommon/Workload.hpp> ++#include <backendsCommon/WorkloadData.hpp> ++ ++namespace armnn ++{ ++ ++class RefReduceMaxWorkload : public BaseWorkload<ReduceMaxQueueDescriptor> ++{ ++public: ++ using BaseWorkload<ReduceMaxQueueDescriptor>::BaseWorkload; ++ virtual void Execute() const override; ++}; ++ ++} //namespace armnn +diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp +index 9427d5c..140dca8 100644 +--- a/src/backends/reference/workloads/RefWorkloads.hpp ++++ b/src/backends/reference/workloads/RefWorkloads.hpp +@@ -68,4 +68,5 @@ + #include "Splitter.hpp" + #include "TensorBufferArrayView.hpp" + #include "RefReverseV2Workload.hpp" ++#include "RefReduceMaxWorkload.hpp" + #include "RefReduceSumWorkload.hpp" +-- +2.7.4 + diff --git a/packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch b/packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch new file mode 100644 index 000000000..6bd599e3c --- /dev/null +++ b/packaging/0007-backends-test-Add-ReduceMax-op-test-cases.patch @@ -0,0 +1,333 @@ +From bf35ac04008cf78641b510c21219bfd7163dfeb8 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Tue, 8 Sep 2020 11:56:48 +0900 +Subject: [PATCH 07/10] backends/test: Add ReduceMax op test cases + +Change-Id: Iebe168dc646981f8a9ab62efc2c6c14aed8d9f84 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + src/backends/backendsCommon/test/CMakeLists.txt | 2 + + src/backends/backendsCommon/test/LayerTests.hpp | 1 + + .../test/layerTests/ReduceMaxTestImpl.cpp | 230 +++++++++++++++++++++ + .../test/layerTests/ReduceMaxTestImpl.hpp | 29 +++ + src/backends/reference/test/RefLayerTests.cpp | 6 + + 5 files changed, 268 insertions(+) + create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp + create mode 100644 src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp + +diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt +index e548b2b..9b8ba74 100644 +--- a/src/backends/backendsCommon/test/CMakeLists.txt ++++ b/src/backends/backendsCommon/test/CMakeLists.txt +@@ -127,6 +127,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources + layerTests/PreluTestImpl.hpp + layerTests/QuantizeTestImpl.cpp + layerTests/QuantizeTestImpl.hpp ++ layerTests/ReduceMaxTestImpl.cpp ++ layerTests/ReduceMaxTestImpl.hpp + layerTests/ReduceSumTestImpl.cpp + layerTests/ReduceSumTestImpl.hpp + layerTests/ReshapeTestImpl.cpp +diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp +index b3cfe4a..b86e8d7 100644 +--- a/src/backends/backendsCommon/test/LayerTests.hpp ++++ b/src/backends/backendsCommon/test/LayerTests.hpp +@@ -44,6 +44,7 @@ + #include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp> + #include <backendsCommon/test/layerTests/PreluTestImpl.hpp> + #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp> ++#include <backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp> + #include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp> + #include <backendsCommon/test/layerTests/ReverseV2TestImpl.hpp> + #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp> +diff --git a/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp +new file mode 100644 +index 0000000..81cebb6 +--- /dev/null ++++ b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.cpp +@@ -0,0 +1,230 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#include "ReduceMaxTestImpl.hpp" ++ ++#include <backendsCommon/test/DataTypeUtils.hpp> ++#include <backendsCommon/test/TensorCopyUtils.hpp> ++#include <backendsCommon/test/WorkloadTestUtils.hpp> ++ ++#include <test/TensorHelpers.hpp> ++ ++namespace ++{ ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceMaxTestCommon( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, ++ const armnn::TensorInfo inputTensorInfo, ++ const armnn::TensorInfo outputTensorInfo, ++ const std::vector<float>& inputData, ++ const std::vector<float>& outputData, ++ int axis = 3) ++{ ++ IgnoreUnused(memoryManager); ++ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo)); ++ ++ LayerTestResult<float, 4> result(outputTensorInfo); ++ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData); ++ ++ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); ++ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); ++ ++ armnn::ReduceMaxQueueDescriptor descriptor; ++ unsigned int updated_idx = static_cast<uint32_t>(axis); ++ if (axis < 0) { ++ updated_idx = static_cast<uint32_t>(static_cast<int32_t>(inputTensorInfo.GetNumDimensions()) + axis); ++ } ++ ++ descriptor.m_Parameters.m_Axis = updated_idx; ++ descriptor.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW; ++ armnn::WorkloadInfo info; ++ ++ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); ++ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); ++ ++ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduceMax(descriptor, info); ++ ++ inputHandle->Allocate(); ++ outputHandle->Allocate(); ++ ++ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin()); ++ ++ workload->Execute(); ++ ++ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get()); ++ ++ return result; ++} ++ ++} // namespace ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceMaxSimpleTest( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 1, 1, 5 }; ++ const armnn::TensorShape outputShape{ 1, 1, 1, 1}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f }); ++ std::vector<float> outputValues({ 10.0f }); ++ ++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, -1); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceMaxMultiChannel_1Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 }; ++ const armnn::TensorShape outputShape{ 1, 1, 2, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ std::vector<float> outputValues({ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ ++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 1); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceMaxMultiChannel_2Test( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 1, 3, 2, 4 }; ++ const armnn::TensorShape outputShape{ 1, 1, 1, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues({ 1.0f, 2.0f, 3.0f, 4.0f, ++ 5.0f, 6.0f, 7.0f, 8.0f, ++ ++ 10.0f, 20.0f, 30.0f, 40.0f, ++ 50.0f, 60.0f, 70.0f, 80.0f, ++ ++ 100.0f, 200.0f, 300.0f, 400.0f, ++ 500.0f, 600.0f, 700.0f, 800.0f }); ++ std::vector<float> outputValues({ 500.0f, 600.0f, 700.0f, 800.0f }); ++ ++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 2); ++} ++ ++template<armnn::DataType ArmnnType, typename T> ++LayerTestResult<float, 4> ReduceMaxMultiBatchAndChannelTest( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) ++{ ++ const armnn::TensorShape inputShape{ 3, 2, 3, 4 }; ++ const armnn::TensorShape outputShape{ 3, 1, 1, 4}; ++ ++ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); ++ ++ if (armnn::IsQuantizedType<T>()) ++ { ++ inputTensorInfo.SetQuantizationScale(1.0f); ++ inputTensorInfo.SetQuantizationOffset(0); ++ } ++ ++ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); ++ ++ std::vector<float> inputValues( {7, 8, 6, 1, ++ 1, 1, 8, 7, ++ 3, 7, 7, 7, ++ ++ 6, 8, 4, 7, ++ 3, 8, 7, 3, ++ 5, 8, 8, 8, ++ ++ ++ 7, 8, 2, 7, ++ 3, 8, 5, 6, ++ 8, 4, 2, 7, ++ ++ 1, 6, 7, 2, ++ 8, 3, 3, 1, ++ 7, 6, 2, 6, ++ ++ ++ 5, 3, 4, 8, ++ 7, 8, 2, 4, ++ 6, 6, 2, 8, ++ ++ 2, 2, 7, 2, ++ 5, 3, 6, 3, ++ 6, 1, 8, 8}); ++ std::vector<float> outputValues({ 7.0f, 8.0f, 8.0f, 8.0f, ++ 8.0f, 8.0f, 7.0f, 7.0f, ++ 7.0f, 8.0f, 8.0f, 8.0f}); ++ ++ return ReduceMaxTestCommon<ArmnnType>(workloadFactory, memoryManager, ++ inputTensorInfo, outputTensorInfo, ++ inputValues, outputValues, 2); ++} ++ ++// Explicit template specializations ++ ++template LayerTestResult<float, 4> ++ReduceMaxSimpleTest<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceMaxMultiChannel_1Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceMaxMultiChannel_2Test<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template LayerTestResult<float, 4> ++ReduceMaxMultiBatchAndChannelTest<armnn::DataType::Float32>( ++ armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +diff --git a/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp +new file mode 100644 +index 0000000..f6691aa +--- /dev/null ++++ b/src/backends/backendsCommon/test/layerTests/ReduceMaxTestImpl.hpp +@@ -0,0 +1,29 @@ ++// ++// Copyright © 2019 Arm Ltd. All rights reserved. ++// SPDX-License-Identifier: MIT ++// ++ ++#pragma once ++ ++#include "LayerTestResult.hpp" ++ ++#include <ResolveType.hpp> ++ ++#include <armnn/backends/IBackendInternal.hpp> ++#include <backendsCommon/WorkloadFactory.hpp> ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceMaxSimpleTest(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceMaxMultiChannel_1Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceMaxMultiChannel_2Test(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); ++ ++template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> ++LayerTestResult<float, 4> ReduceMaxMultiBatchAndChannelTest(armnn::IWorkloadFactory& workloadFactory, ++ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); +diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp +index a5b1f9d..3ce4fe2 100644 +--- a/src/backends/reference/test/RefLayerTests.cpp ++++ b/src/backends/reference/test/RefLayerTests.cpp +@@ -1980,6 +1980,12 @@ ARMNN_AUTO_TEST_CASE(Neg3dQuantisedAsymm8, Neg3dTest<DataType::QAsymmU8>) + ARMNN_AUTO_TEST_CASE(Neg2dQuantisedSymm16, Neg2dTest<DataType::QSymmS16>) + ARMNN_AUTO_TEST_CASE(Neg3dQuantisedSymm16, Neg3dTest<DataType::QSymmS16>) + ++// ReduceMax ++ARMNN_AUTO_TEST_CASE(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceMaxMultiChannelFloat32, ReduceMaxMultiChannel_1Test<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceMaxMultiChannel2Float32, ReduceMaxMultiChannel_2Test<DataType::Float32>) ++ARMNN_AUTO_TEST_CASE(ReduceMaxMultiBatchAndChannelFloat32, ReduceMaxMultiBatchAndChannelTest<DataType::Float32>) ++ + // ReduceSum + ARMNN_AUTO_TEST_CASE(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>) + ARMNN_AUTO_TEST_CASE(ReduceSumMultiChannelFloat32, ReduceSumMultiChannel_1Test<DataType::Float32>) +-- +2.7.4 + diff --git a/packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch b/packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch new file mode 100644 index 000000000..5aefaca18 --- /dev/null +++ b/packaging/0008-armnnTfLiteParser-Add-Division-op-support.patch @@ -0,0 +1,98 @@ +From a5302d101304c0d62ce87bb0cae98190badece93 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Tue, 8 Sep 2020 12:25:40 +0900 +Subject: [PATCH 08/10] armnnTfLiteParser: Add Division op support + +Change-Id: Ib4bd0238f2cf19103e17a9d4fe03a30ab2615aa8 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + include/armnn/DescriptorsFwd.hpp | 1 + + src/armnnTfLiteParser/TfLiteParser.cpp | 40 ++++++++++++++++++++++++++++++++++ + src/armnnTfLiteParser/TfLiteParser.hpp | 1 + + 3 files changed, 42 insertions(+) + +diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp +index 3b736f9..3b0f01f 100644 +--- a/include/armnn/DescriptorsFwd.hpp ++++ b/include/armnn/DescriptorsFwd.hpp +@@ -36,6 +36,7 @@ struct ResizeDescriptor; + struct ReverseV2Descriptor; + struct ReduceMaxDescriptor; + struct ReduceSumDescriptor; ++struct DivisionDescriptor; + struct SoftmaxDescriptor; + struct SpaceToBatchNdDescriptor; + struct SpaceToDepthDescriptor; +diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp +index 05a15e5..da30ac8 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.cpp ++++ b/src/armnnTfLiteParser/TfLiteParser.cpp +@@ -535,6 +535,7 @@ TfLiteParser::TfLiteParser(const Optional<ITfLiteParser::TfLiteParserOptions>& o + m_ParserFunctions[tflite::BuiltinOperator_ARG_MAX] = &TfLiteParser::ParseArgMax; + m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MAX] = &TfLiteParser::ParseReduceMax; + m_ParserFunctions[tflite::BuiltinOperator_SUM] = &TfLiteParser::ParseSum; ++ m_ParserFunctions[tflite::BuiltinOperator_DIV] = &TfLiteParser::ParseDiv; + + // register supported custom operators + m_CustomParserFunctions["TFLite_Detection_PostProcess"] = &TfLiteParser::ParseDetectionPostProcess; +@@ -2899,6 +2900,45 @@ void TfLiteParser::ParseSum(size_t subgraphIndex, size_t operatorIndex) + RegisterOutputSlots(subgraphIndex, operatorIndex, layer, outputTensorIndexes); + } + ++void TfLiteParser::ParseDiv(size_t subgraphIndex, size_t operatorIndex) ++{ ++ const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex]; ++ const auto *options = operatorPtr->builtin_options.AsDivOptions(); ++ ++ CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ++ ++ auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(inputs.size(), 2); ++ ++ auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex); ++ CHECK_VALID_SIZE(outputs.size(), 1); ++ ++ armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); ++ armnn::TensorInfo input1TensorInfo = ToTensorInfo(inputs[1]); ++ ++ auto layerName = boost::str(boost::format("Div:%1%:%2%") % subgraphIndex % operatorIndex); ++ ++ IConnectableLayer* layer = m_Network->AddDivisionLayer(layerName.c_str()); ++ ++ TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); ++ layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); ++ ++ auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ if (inputTensorInfo.GetNumDimensions() != input1TensorInfo.GetNumDimensions()) ++ { ++ AddBroadcastReshapeLayer(subgraphIndex, operatorIndex, layer); ++ } ++ else ++ { ++ RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0], inputTensorIndexes[1]}); ++ } ++ ++ layer = AddFusedActivationLayer(layer, 0, options->fused_activation_function); ++ ++ auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex)); ++ RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]}); ++} ++ + armnn::IConnectableLayer* TfLiteParser::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer, + unsigned int outputSlot, + tflite::ActivationFunctionType activationType) +diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp +index da635ae..691716b 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.hpp ++++ b/src/armnnTfLiteParser/TfLiteParser.hpp +@@ -137,6 +137,7 @@ private: + void ParseArgMax(size_t subgraphIndex, size_t operatorIndex); + void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex); + void ParseSum(size_t subgraphIndex, size_t operatorIndex); ++ void ParseDiv(size_t subgraphIndex, size_t operatorIndex); + + void RegisterProducerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IOutputSlot* slot); + void RegisterConsumerOfTensor(size_t subgraphIndex, size_t tensorIndex, armnn::IInputSlot* slot); +-- +2.7.4 + diff --git a/packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch b/packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch new file mode 100644 index 000000000..1d0bebea0 --- /dev/null +++ b/packaging/0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch @@ -0,0 +1,40 @@ +From 7ca13b0bc9daf0cbe379727d4662c2f7fbc25164 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Wed, 9 Sep 2020 14:58:51 +0900 +Subject: [PATCH 09/10] tfLiteParser: Fix axis value for Pack op + +Change-Id: I53bcdf193cfac4f8ca9d157943a9d1687164f862 +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + src/armnnTfLiteParser/TfLiteParser.cpp | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp +index da30ac8..f51ffc7 100644 +--- a/src/armnnTfLiteParser/TfLiteParser.cpp ++++ b/src/armnnTfLiteParser/TfLiteParser.cpp +@@ -2372,12 +2372,19 @@ void TfLiteParser::ParsePack(size_t subgraphIndex, size_t operatorIndex) + const auto& operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex]; + const auto* options = operatorPtr->builtin_options.AsPackOptions(); + ++ armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); ++ ++ uint32_t updated_axis = 0; ++ if (options->axis != 0) ++ updated_axis = inputTensorInfo.GetNumDimensions(); ++ else ++ updated_axis = inputTensorInfo.GetNumDimensions() - 1; ++ + StackDescriptor desc; +- desc.m_Axis = static_cast<uint32_t>(options->axis); ++ desc.m_Axis = updated_axis; + desc.m_NumInputs = static_cast<uint32_t>(inputs.size()); + + // Use the tensor shape of the first input as the "correct" input shape in the descriptor +- armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); + desc.m_InputShape = inputTensorInfo.GetShape(); + + auto layerName = boost::str(boost::format("Pack:%1%:%2%") % subgraphIndex % operatorIndex); +-- +2.7.4 + diff --git a/packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch b/packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch new file mode 100644 index 000000000..49202b0db --- /dev/null +++ b/packaging/0010-backends-Skip-ArgMax-op-for-GpuAcc.patch @@ -0,0 +1,39 @@ +From e389db5b0ba3a8a58a7426f53d32047a46448aa9 Mon Sep 17 00:00:00 2001 +From: Inki Dae <inki.dae@samsung.com> +Date: Wed, 9 Sep 2020 15:17:35 +0900 +Subject: [PATCH 10/10] backends: Skip ArgMax op for GpuAcc + +With ArgMax op for GpuAcc CL kernel compiling failed. +So skip using this op and try to use the one for CpuRef +in case of GPU type. + +This is a workaround patch so should be fixed with generic way. + +Change-Id: I2993be977b7227322d0a446da88506fa60f28e4c +Signed-off-by: Inki Dae <inki.dae@samsung.com> +--- + src/backends/backendsCommon/WorkloadFactory.cpp | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp +index 639e95c..3c8cb35 100644 +--- a/src/backends/backendsCommon/WorkloadFactory.cpp ++++ b/src/backends/backendsCommon/WorkloadFactory.cpp +@@ -62,6 +62,14 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, + return false; + } + ++#if 1 // Workaround. ++ if ((backendId == armnn::Compute::GpuAcc) && ++ (layer.GetNameStr().find("ArgMax") != std::string::npos)) { ++ std::cout << __func__ << " => Skipped " << layer.GetName() << std::endl; ++ return false; ++ } ++#endif ++ + auto backendFactory = backendRegistry.GetFactory(backendId); + auto backendObject = backendFactory(); + auto layerSupportObject = backendObject->GetLayerSupport(); +-- +2.7.4 + diff --git a/packaging/armnn.spec b/packaging/armnn.spec index 9ff541622..b112d4b86 100644 --- a/packaging/armnn.spec +++ b/packaging/armnn.spec @@ -8,6 +8,18 @@ Source0: %{name}-%{version}.tar.gz Source1001: %{name}.manifest ExclusiveArch: %arm aarch64 +# Patch set for hand gesture model support from AIC +Source2001: 0001-backens-reference-Add-ArgMinMax-op-support.patch +Source2002: 0002-backends-reference-Add-ReduceSum-op-support.patch +Source2003: 0003-backends-test-Add-ReduceSum-test-cases.patch +Source2004: 0004-backends-reference-Add-ReverseV2-op-support.patch +Source2005: 0005-backends-test-Add-ReverseV2-op-test-cases.patch +Source2006: 0006-backends-reference-Add-ReduceMax-op-support.patch +Source2007: 0007-backends-test-Add-ReduceMax-op-test-cases.patch +Source2008: 0008-armnnTfLiteParser-Add-Division-op-support.patch +Source2009: 0009-tfLiteParser-Fix-axis-value-for-Pack-op.patch +Source2010: 0010-backends-Skip-ArgMax-op-for-GpuAcc.patch + %define TF_LITE_SUPPORT 1 %define TF_SUPPORT 0 %define CAFFE_SUPPORT 0 @@ -63,6 +75,28 @@ Summary: Sample application and benchmark binaries to test ARM Neural Network Li %setup -q cp %{SOURCE1001} . +cp %{SOURCE2001} . +cp %{SOURCE2002} . +cp %{SOURCE2003} . +cp %{SOURCE2004} . +cp %{SOURCE2005} . +cp %{SOURCE2006} . +cp %{SOURCE2007} . +cp %{SOURCE2008} . +cp %{SOURCE2009} . +cp %{SOURCE2010} . + +patch -p1 < %{SOURCE2001} +patch -p1 < %{SOURCE2002} +patch -p1 < %{SOURCE2003} +patch -p1 < %{SOURCE2004} +patch -p1 < %{SOURCE2005} +patch -p1 < %{SOURCE2006} +patch -p1 < %{SOURCE2007} +patch -p1 < %{SOURCE2008} +patch -p1 < %{SOURCE2009} +patch -p1 < %{SOURCE2010} + %build #compile proto files |