summaryrefslogtreecommitdiff
path: root/runtimes/pure_arm_compute/src
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/pure_arm_compute/src')
-rw-r--r--runtimes/pure_arm_compute/src/compilation.cc2644
-rw-r--r--runtimes/pure_arm_compute/src/compilation.h30
-rw-r--r--runtimes/pure_arm_compute/src/event.h9
-rw-r--r--runtimes/pure_arm_compute/src/execution.cc26
-rw-r--r--runtimes/pure_arm_compute/src/execution.h52
-rw-r--r--runtimes/pure_arm_compute/src/internal/FeatureSink.h36
-rw-r--r--runtimes/pure_arm_compute/src/internal/FeatureSource.h33
-rw-r--r--runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h17
-rw-r--r--runtimes/pure_arm_compute/src/internal/MatrixSink.h20
-rw-r--r--runtimes/pure_arm_compute/src/internal/MatrixSource.h23
-rw-r--r--runtimes/pure_arm_compute/src/internal/Model.cc25
-rw-r--r--runtimes/pure_arm_compute/src/internal/Model.h245
-rw-r--r--runtimes/pure_arm_compute/src/internal/Sink.h16
-rw-r--r--runtimes/pure_arm_compute/src/internal/Sinks.h35
-rw-r--r--runtimes/pure_arm_compute/src/internal/Source.h17
-rw-r--r--runtimes/pure_arm_compute/src/internal/Swizzle.h33
-rw-r--r--runtimes/pure_arm_compute/src/internal/Tensor3DSink.h23
-rw-r--r--runtimes/pure_arm_compute/src/internal/Tensor3DSource.h23
-rw-r--r--runtimes/pure_arm_compute/src/internal/TensorSource.h32
-rw-r--r--runtimes/pure_arm_compute/src/internal/VectorSink.h25
-rw-r--r--runtimes/pure_arm_compute/src/internal/VectorSource.h22
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute.h123
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc165
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h170
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h61
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h42
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h36
-rw-r--r--runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h37
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h25
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc8
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h33
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc4
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h27
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc78
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc181
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h25
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc110
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc79
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h93
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc110
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc137
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc140
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h46
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc75
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h (renamed from runtimes/pure_arm_compute/src/internal/layers/PadLayer.h)80
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc77
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc172
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h44
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc53
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h39
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc142
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h50
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc73
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h35
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc155
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h59
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc74
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h51
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc40
-rw-r--r--runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h35
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h28
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h61
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h38
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h36
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h36
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h62
-rw-r--r--runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h49
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Abs.cc59
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Abs.h68
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Add.h47
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ArgMax.cc64
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ArgMax.h70
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h110
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc63
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h83
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Cast.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Concat.h46
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Conv2D.h110
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h70
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h114
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Dequantize.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Div.h47
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h45
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Equal.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Equal.h83
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Exp.cc63
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Exp.h69
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Floor.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/FullyConnected.h52
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Gather.h48
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc120
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h165
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc16
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/L2Normalization.h59
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc248
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h328
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc64
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h73
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h83
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc60
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LogicalNot.h82
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/LogicalOr.h83
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Logistic.h42
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Lstm.h91
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h110
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Mean.h47
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Mul.h45
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Neg.cc63
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Neg.h69
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Node.h20
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h372
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/NotEqual.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/NotEqual.h83
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/PReLU.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/PReLU.h109
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Pack.cc69
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Pack.h72
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Pad.cc126
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Pad.h176
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/RSQRT.h42
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReLU.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReLU1.h43
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReLU6.h41
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReduceMax.h45
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReduceMin.h107
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc65
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ReduceSum.h70
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Reshape.h46
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h47
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Rnn.h56
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SQRT.cc62
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SQRT.h105
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Softmax.h45
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc67
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h71
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h45
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Split.h46
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc20
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h61
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Squeeze.h46
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/StridedSlice.h56
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Sub.h48
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Tanh.h44
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/TopKV2.h47
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Transpose.h45
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc74
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/TransposeConv.h74
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Unpack.cc68
-rw-r--r--runtimes/pure_arm_compute/src/internal/op/Unpack.h72
-rw-r--r--runtimes/pure_arm_compute/src/logging.h21
-rw-r--r--runtimes/pure_arm_compute/src/memory.cc4
-rw-r--r--runtimes/pure_arm_compute/src/memory.h31
-rw-r--r--runtimes/pure_arm_compute/src/model.cc223
-rw-r--r--runtimes/pure_arm_compute/src/model.h30
164 files changed, 12047 insertions, 1703 deletions
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index 2488332a6..bed42529b 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file compilation.cc
+ * @brief This file contains ANeuralNetworksCompilation APIs and related classes
+ * @ingroup COM_AI_RUNTIME
+ */
+
#include <NeuralNetworks.h>
// For CLKernelLibraryEx initialization
@@ -25,28 +31,42 @@
#include <arm_compute/runtime/CL/CLScheduler.h>
#include <arm_compute/runtime/CL/CLSubTensor.h>
#include <arm_compute/runtime/CL/functions/CLArithmeticAddition.h>
-#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h>
+#include <arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h>
+#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h>
#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
#include <arm_compute/runtime/CL/functions/CLPoolingLayer.h>
#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
+#include <arm_compute/runtime/CL/functions/CLActivationLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLScale.h>
+#include <arm_compute/runtime/CL/functions/CLSpaceToBatchND.h>
+#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h>
#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
-#include <arm_compute/runtime/CL/functions/CLStridedSlice.h>
+#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h>
#include <arm_compute/runtime/CL/functions/CLGather.h>
+#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
#include <arm_compute/runtime/CL/functions/CLTopKV2.h>
-#include <arm_compute/runtime/CL/functions/CLReduceMax.h>
+#include <arm_compute/runtime/CL/functions/CLArgMinMax.h>
#include <arm_compute/runtime/CL/functions/CLCast.h>
#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h>
#include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
#include <arm_compute/runtime/CL/functions/CLDequantizationLayer.h>
-#include <arm_compute/runtime/CL/functions/CLReductionMean.h>
-#include <arm_compute/runtime/CL/functions/CLTranspose.h>
+#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
+#include <arm_compute/runtime/CL/functions/CLPermuteEx.h>
+#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
#include <arm_compute/runtime/CL/functions/CLRNNLayer.h>
#include <arm_compute/runtime/CL/functions/CLFloor.h>
#include <arm_compute/runtime/CL/functions/CLCopy.h>
-#include <arm_compute/runtime/CL/functions/CLNormalizationLayer.h>
+#include <arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h>
+#include <arm_compute/runtime/CL/functions/CLExp.h>
+#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
+#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
+#include <arm_compute/runtime/CL/functions/CLSquaredDifference.h>
+#include <arm_compute/runtime/CL/functions/CLNeg.h>
+#include <arm_compute/runtime/CL/functions/CLPReLU.h>
+#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
+#include <arm_compute/runtime/CL/functions/CLComparisonOp.h>
#include <arm_compute/runtime/SubTensor.h>
#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
@@ -58,7 +78,7 @@
#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFloor.h>
-#include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h>
+#include <arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
#include "internal/arm_compute.h"
@@ -74,19 +94,28 @@
#include "internal/arm_compute/tensor/View.h"
#include "internal/layers/GenericReshapeLayer.h"
#include "internal/layers/SimpleArithmeticAddition.h"
+#include "internal/layers/SimplePadLayer.h"
#include "internal/layers/SimpleCastLayer.h"
+#include "internal/layers/SimpleTransposeConv.h"
#include "internal/layers/GenericFullyConnectedLayer.h"
-#include "internal/layers/PadLayer.h"
#include "internal/layers/SimpleSpaceToDepth.h"
#include "internal/layers/SimpleEmbeddingLookup.h"
-#include "internal/layers/SquaredDifferenceOperation.h"
-
-#include "util/matrix/IndexIterator.h"
-#include "util/kernel/IndexIterator.h"
-#include "util/feature/IndexIterator.h"
-#include "util/tensor/IndexIterator.h"
-
-#include <nnfw/std/memory.h>
+#include "internal/layers/SimpleDepthToSpace.h"
+#include "internal/layers/SimpleBatchToSpaceNd.h"
+#include "internal/layers/SimpleHashtableLookupLayer.h"
+#include "internal/layers/SimplePackLayer.h"
+#include "internal/layers/SimpleSpaceToBatchND.h"
+#include "internal/layers/SimpleNeg.h"
+#include "internal/layers/SimpleUnpackLayer.h"
+#include "internal/layers/SimpleSQRT.h"
+#include "internal/layers/SimpleArgMinMax.h"
+
+#include "misc/matrix/IndexIterator.h"
+#include "misc/kernel/IndexIterator.h"
+#include "misc/feature/IndexIterator.h"
+#include "misc/tensor/IndexIterator.h"
+
+#include <cpp14/memory.h>
#include "compilation.h"
#include "model.h"
@@ -154,8 +183,7 @@ Padding valid_padding(void)
return padding;
}
-Padding same_padding(const nnfw::util::feature::Shape &ifm_shape,
- const nnfw::util::feature::Shape &ofm_shape, const Stride &stride, uint32_t kw,
+Padding same_padding(const nnfw::misc::feature::Shape &ifm_shape, const Stride &stride, uint32_t kw,
uint32_t kh)
{
Padding padding;
@@ -164,13 +192,16 @@ Padding same_padding(const nnfw::util::feature::Shape &ifm_shape,
//
// SAME padding. Padding on both ends are the "same":
//
- // padding_to_beginning = total_padding / 2
- // padding_to_end = (total_padding + 1)/2.
+ // padding_to_beginning = total_padding / 2
+ // padding_to_end = (total_padding + 1)/2.
//
- const int32_t vertical_needed_input = (ofm_shape.H - 1) * stride.vertical + kh;
+ const int32_t out_size_height = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
+ const int32_t out_size_width = (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+
+ const int32_t vertical_needed_input = (out_size_height - 1) * stride.vertical + kh;
const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
- const int32_t horizontal_needed_input = (ofm_shape.W - 1) * stride.horizontal + kw;
+ const int32_t horizontal_needed_input = (out_size_width - 1) * stride.horizontal + kw;
const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
padding.top = vertical_total_padding / 2;
@@ -208,14 +239,14 @@ using namespace std::placeholders;
template <typename T>
static void initFeatureTensor(::arm_compute::ITensor &tensor,
- const nnfw::util::feature::Shape &feature_shape,
+ const nnfw::misc::feature::Shape &feature_shape,
const uint8_t *feature_base, const size_t feature_size)
{
const ::internal::nnapi::feature::Reader<T> from{
feature_shape, reinterpret_cast<const T *>(feature_base), feature_size};
::internal::arm_compute::feature::View<T> into{&tensor};
- ::nnfw::util::feature::iterate(feature_shape)
+ ::nnfw::misc::feature::iterate(feature_shape)
<< [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
const auto value = from.at(batch, ch, row, col);
into.at(batch, ch, row, col) = value;
@@ -241,29 +272,29 @@ static void initVectorTensor(::arm_compute::ITensor &tensor, const uint8_t *vec_
template <typename T>
static void initTensor3D(::arm_compute::ITensor &tensor,
- const nnfw::util::tensor::Shape &tensor_shape, const uint8_t *tensor_base,
+ const nnfw::misc::tensor::Shape &tensor_shape, const uint8_t *tensor_base,
const size_t tensor_size)
{
const ::internal::nnapi::tensor::Reader<T> from{
tensor_shape, reinterpret_cast<const T *>(tensor_base), tensor_size};
::internal::arm_compute::tensor::View<T> into{&tensor};
- ::nnfw::util::tensor::iterate(tensor_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) {
- ::nnfw::util::tensor::Index index_ACL = ::nnfw::util::tensor::copy_reverse(index_nnapi);
+ ::nnfw::misc::tensor::iterate(tensor_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) {
+ ::nnfw::misc::tensor::Index index_ACL = ::nnfw::misc::tensor::copy_reverse(index_nnapi);
into.at(index_ACL) = from.at(index_nnapi);
};
}
template <typename T>
static void initMatrixTensor(::arm_compute::ITensor &tensor,
- const nnfw::util::matrix::Shape &matrix_shape,
+ const nnfw::misc::matrix::Shape &matrix_shape,
const uint8_t *matrix_base, const size_t matrix_size)
{
const ::internal::nnapi::matrix::Reader<T> from{
matrix_shape, reinterpret_cast<const T *>(matrix_base), matrix_size};
::internal::arm_compute::matrix::View<T> into{&tensor};
- ::nnfw::util::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) {
+ ::nnfw::misc::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) {
const auto value = from.at(row, col);
into.at(row, col) = value;
};
@@ -288,34 +319,66 @@ static void initReorderVectorTensor(::arm_compute::ITensor &tensor, const uint8_
template <typename T>
static void initKernelTensor(::arm_compute::ITensor &tensor,
- const nnfw::util::kernel::Shape &kernel_shape,
+ const nnfw::misc::kernel::Shape &kernel_shape,
const uint8_t *kernel_base, const size_t kernel_size)
{
const ::internal::nnapi::kernel::Reader<T> from{
kernel_shape, reinterpret_cast<const T *>(kernel_base), kernel_size};
::internal::arm_compute::kernel::View<T> into{&tensor};
- ::nnfw::util::kernel::iterate(kernel_shape)
+ ::nnfw::misc::kernel::iterate(kernel_shape)
<< [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
const auto value = from.at(nth, ch, row, col);
into.at(nth, ch, row, col) = value;
};
}
+/**
+ * @brief Structure to provide interface methods of compilation plan builder
+ */
struct IPlanBuilder
{
+ /**
+ * @brief Destruct IPlanBuilder object using default destructor
+ */
virtual ~IPlanBuilder() = default;
+ /**
+ * @brief Add TensorInfo with Shape Constraints
+ * @param [in] ind Index of operand
+ * @param [in] info TensorInfo value to set to index of operand
+ * @return N/A
+ */
virtual void addShapeConstr(const ::internal::tflite::operand::Index &ind,
const ::arm_compute::TensorInfo &info) = 0;
+ /**
+ * @brief Add Subsumption constraints
+ * @param [in] ind Index of operand
+ * @param [in] base Index of base operand of Subsumption
+ * @param [in] offset Offset of Subsumption
+ * @param [in] shape Shape of Subsumption
+ * @param [in] extend_parent extend_parent value of Subsumption
+ * @return N/A
+ */
virtual void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
const ::internal::tflite::operand::Index &base,
const ::arm_compute::Coordinates &offset,
const ::arm_compute::TensorShape &shape,
bool extend_parent = false) = 0;
+ /**
+ * @brief Add Initializer lambda with ITensor param
+ * @param [in] ind Index of operand
+ * @param [in] initializer Initializer to add
+ * @return N/A
+ */
virtual void addInitializer(const ::internal::tflite::operand::Index &ind,
const Initializer &initializer) = 0;
- virtual void addStage(const Stage &) = 0;
+ /**
+ * @brief Add Stage lambda with IAllocationContext and IExecutionBuilder params
+ * @param [in] stage Stage to add
+ * @return N/A
+ */
+ virtual void addStage(const Stage &stage) = 0;
};
//
@@ -333,7 +396,6 @@ private:
void appendReLU(::arm_compute::ITensor *tensor);
void appendReLU6(::arm_compute::ITensor *tensor);
void appendReLU1(::arm_compute::ITensor *tensor);
- void appendTanh(::arm_compute::ITensor *tensor);
public:
void append(FuseCode code, ::arm_compute::ITensor *tensor);
@@ -349,7 +411,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
@@ -357,7 +419,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, nullptr, act_info);
@@ -372,7 +434,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
@@ -380,7 +442,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, nullptr, act_info);
@@ -395,7 +457,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
@@ -403,7 +465,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, nullptr, act_info);
@@ -411,23 +473,6 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
}
}
-void ActivationBuilder::appendTanh(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- if (::internal::arm_compute::isGpuMode())
- {
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
-
- _builder.append("Tanh", std::move(fn));
- }
- else
- throw std::runtime_error("Not supported, yet");
-}
-
void ActivationBuilder::append(FuseCode code, ::arm_compute::ITensor *ifm_alloc)
{
switch (code)
@@ -490,9 +535,11 @@ public:
void visit(const ::internal::tflite::op::Softmax::Node &node) override;
void visit(const ::internal::tflite::op::StridedSlice::Node &node) override;
void visit(const ::internal::tflite::op::ReduceMax::Node &node) override;
+ void visit(const ::internal::tflite::op::ReduceMin::Node &node) override;
void visit(const ::internal::tflite::op::Cast::Node &node) override;
void visit(const ::internal::tflite::op::TopKV2::Node &node) override;
void visit(const ::internal::tflite::op::Gather::Node &node) override;
+ void visit(const ::internal::tflite::op::PReLU::Node &node) override;
void visit(const ::internal::tflite::op::ReLU::Node &node) override;
void visit(const ::internal::tflite::op::ReLU1::Node &node) override;
void visit(const ::internal::tflite::op::ReLU6::Node &node) override;
@@ -504,15 +551,33 @@ public:
void visit(const ::internal::tflite::op::LSTM::Node &node) override;
void visit(const ::internal::tflite::op::Floor::Node &node) override;
void visit(const ::internal::tflite::op::Split::Node &node) override;
+ void visit(const ::internal::tflite::op::ArgMax::Node &node) override;
void visit(const ::internal::tflite::op::RSQRT::Node &node) override;
+ void visit(const ::internal::tflite::op::SQRT::Node &node) override;
void visit(const ::internal::tflite::op::Pad::Node &node) override;
void visit(const ::internal::tflite::op::SpaceToDepth::Node &node) override;
+ void visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) override;
+ void visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node) override;
void visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node) override;
void visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node) override;
void visit(const ::internal::tflite::op::EmbeddingLookup::Node &node) override;
void visit(const ::internal::tflite::op::HashtableLookup::Node &node) override;
void visit(const ::internal::tflite::op::L2Normalization::Node &node) override;
void visit(const ::internal::tflite::op::SquaredDifference::Node &node) override;
+ void visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node) override;
+ void visit(const ::internal::tflite::op::DepthToSpace::Node &node) override;
+ void visit(const ::internal::tflite::op::Unpack::Node &node) override;
+ void visit(const ::internal::tflite::op::Neg::Node &node) override;
+ void visit(const ::internal::tflite::op::Exp::Node &node) override;
+ void visit(const ::internal::tflite::op::ReduceSum::Node &node) override;
+ void visit(const ::internal::tflite::op::Equal::Node &node) override;
+ void visit(const ::internal::tflite::op::TransposeConv::Node &node) override;
+ void visit(const ::internal::tflite::op::Pack::Node &node) override;
+ void visit(const ::internal::tflite::op::Abs::Node &node) override;
+ void visit(const ::internal::tflite::op::NotEqual::Node &node) override;
+ void visit(const ::internal::tflite::op::LogicalAnd::Node &node) override;
+ void visit(const ::internal::tflite::op::LogicalNot::Node &node) override;
+ void visit(const ::internal::tflite::op::LogicalOr::Node &node) override;
private:
const ::internal::tflite::operand::Set &_ctx;
@@ -582,7 +647,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
// NOTE SimpleArithmeticAddition does not support broadcasting
assert(lhs_shape == rhs_shape);
- auto l = nnfw::make_unique<SimpleArithmeticAddition>();
+ auto l = nnfw::cpp14::make_unique<SimpleArithmeticAddition>();
l->configure(lhs_alloc, rhs_alloc, ofm_alloc);
@@ -592,7 +657,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto l = nnfw::make_unique<::arm_compute::CLArithmeticAddition>();
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
@@ -602,7 +667,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
}
else // NEON
{
- auto l = nnfw::make_unique<::arm_compute::NEArithmeticAddition>();
+ auto l = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
@@ -672,7 +737,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLArithmeticSubtraction>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtractionEx>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
@@ -682,7 +747,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
}
else // NEON
{
- auto fn = nnfw::make_unique<::arm_compute::NEArithmeticSubtraction>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>();
// TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
@@ -696,7 +761,6 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
_builder.addStage(stage);
}
-// TODO: test with scalar*scalar, tensor bigger than 3D (e.g., 4D)
void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
{
const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
@@ -754,7 +818,7 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLPixelWiseMultiplication>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>();
fn->configure(CAST_CL(lhs_input_alloc), CAST_CL(rhs_input_alloc), CAST_CL(output_alloc),
1.0, // scale
@@ -765,9 +829,9 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
}
else // NEON
{
- auto fn = nnfw::make_unique<::arm_compute::NEPixelWiseMultiplication>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>();
- fn->configure(CAST_NE(lhs_input_alloc), CAST_NE(rhs_input_alloc), CAST_NE(output_alloc),
+ fn->configure(lhs_input_alloc, rhs_input_alloc, output_alloc,
1.0, // scale
arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
@@ -836,11 +900,11 @@ void Planner::visit(const ::internal::tflite::op::Div::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLPixelWiseDivision>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseDivision>();
- // TODO Decide scale, overflow_policy, and rounding_policy.
- // Currently, the default values are used.
- fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
+ fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
+ 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
builder.append("Div", std::move(fn));
}
@@ -975,7 +1039,7 @@ void Planner::visit(const ::internal::tflite::op::Conv2D::Implicit::Node &node)
param.stride = stride;
param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
- ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+ ? same_padding(ifm_shape, stride, ker_shape.W, ker_shape.H)
: valid_padding();
param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
@@ -1255,7 +1319,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
param.stride = stride;
param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
- ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+ ? same_padding(ifm_shape, stride, ker_shape.W, ker_shape.H)
: valid_padding();
param.multipler = multiplier;
@@ -1293,7 +1357,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
conv_info, param.multipler);
@@ -1302,7 +1366,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
@@ -1436,7 +1500,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
conv_info, param.multipler);
@@ -1445,7 +1509,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
@@ -1499,7 +1563,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
if (from_env<bool>(std::getenv("USE_SIMPLE_CAST")))
{
// Use the CPU version of CAST operation
- auto l = nnfw::make_unique<SimpleCastLayer>();
+ auto l = nnfw::cpp14::make_unique<SimpleCastLayer>();
l->configure(input_alloc, output_alloc);
fn = std::move(l);
@@ -1508,7 +1572,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto l = nnfw::make_unique<::arm_compute::CLCast>();
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
fn = std::move(l);
@@ -1554,10 +1618,12 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &nod
// TODO Should move to the place where the operand is handled, if it is possible.
// Set Shape Constraints and TensorInfo
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -1586,7 +1652,7 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &nod
param.stride.horizontal = hstride;
param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
- ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+ ? same_padding(ifm_shape, param.stride, kw, kh)
: valid_padding();
param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
@@ -1652,7 +1718,6 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &nod
const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
- // TODO 4D tensor (dim(0) !=1 )
const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
@@ -1669,10 +1734,12 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &nod
// TODO Should move to the place where the operand is handled, if it is possible.
// Set Shape Constraints and TensorInfo
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -1782,10 +1849,12 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &nod
// TODO Should move to the place where the operand is handled, if it is possible.
// Set Shape Constraints and TensorInfo
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -1814,7 +1883,7 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &nod
param.stride.horizontal = hstride;
param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
- ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+ ? same_padding(ifm_shape, param.stride, kw, kh)
: valid_padding();
param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
@@ -1882,7 +1951,6 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &nod
const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
- // TODO 4D tensor (dim(0) != 1)
const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
@@ -1899,10 +1967,12 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &nod
// TODO Should move to the place where the operand is handled, if it is possible.
// Set Shape Constraints and TensorInfo
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -1998,8 +2068,9 @@ void Planner::visit(const ::internal::tflite::op::Concat::Node &node)
}
// Set Shape Constraints and TensorInfo (for output)
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
// Set Shape Constraints and TensorInfo (for input)
const uint32_t coord_index = ToARMComputeAxis(input_rank, axis).value();
@@ -2060,7 +2131,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
internal::tflite::operand::Shape reshape(2);
if (input_rank == 4)
{
- nnfw::util::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
+ nnfw::misc::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
auto feature_size =
ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W;
assert(feature_size == batch_size * input_size);
@@ -2078,7 +2149,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
else if (input_rank == 2)
{
auto ifm_shape = _ctx.at(input_index).shape();
- nnfw::util::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix();
+ nnfw::misc::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix();
assert(ifm_shape.dim(0) == batch_size);
assert(ifm_shape.dim(1) == input_size);
@@ -2131,7 +2202,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
auto weight_alloc = ctx.at(::internal::tflite::operand::Index{param.weight_index});
auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
- auto fn = nnfw::make_unique<GenericFullyConnectedLayer>();
+ auto fn = nnfw::cpp14::make_unique<GenericFullyConnectedLayer>();
fn->configure(input_alloc, weight_alloc, bias_alloc, output_alloc, needs_reshape,
asTensorShape(reshape));
@@ -2154,10 +2225,12 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node)
// TODO Should move to the place where the operand is handled, if it is possible.
// Set Shape Constraints
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
struct Param
{
@@ -2181,7 +2254,7 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLScale>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
::arm_compute::InterpolationPolicy::BILINEAR,
@@ -2202,18 +2275,19 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
const ::internal::tflite::operand::Index output_index{node.param().output_index};
const ::internal::tflite::operand::Index input_index{node.param().input_index};
- // NOTE The content of a tensor specified by shape_index should be aligned with
- // output tensor shape
- // TODO Check consistency of ouput shape
+ auto input_shape = asTensorShape(_ctx.at(input_index).shape());
+ auto output_shape = asTensorShape(_ctx.at(output_index).shape());
- // TODO Re-enable this assert
- // assert((ifm_shape.C * ifm_shape.H * ifm_shape.W) == out_size);
+ assert(input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] ==
+ output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]);
// TODO Should move to the place where the operand is handled, if it is possible.
- _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
- _ctx.at(output_index).type()));
- _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
- _ctx.at(input_index).type()));
+ _builder.addShapeConstr(output_index, asTensorInfo(output_shape, _ctx.at(output_index).type(),
+ _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index, asTensorInfo(input_shape, _ctx.at(input_index).type(),
+ _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
struct Param
{
@@ -2233,7 +2307,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
if (::internal::arm_compute::isGpuMode())
{
// GenericReshape first apply NCHW->NHWC permutation, and apply reshape
- auto fn = nnfw::make_unique<GenericReshapeLayer>();
+ auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
@@ -2241,7 +2315,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
}
else
{
- auto fn = nnfw::make_unique<GenericReshapeLayer>();
+ auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
fn->configure(input_alloc, output_alloc);
@@ -2259,19 +2333,15 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
const ::internal::tflite::operand::Index output_index{node.param().output_index};
const ::internal::tflite::operand::Index input_index{node.param().input_index};
- // Currently, 3D-input with dims is tested. Note that param(). dims_index_optional is optional.
- // two generated test passed:
- // - 3D input : squeeze_float_1
- // - 2D input : squeeze_3D_float_1
- // - 4D input fails (squeeze.mod.py) -> we need general tensor support
-
- // TODO Support generic tensor shape
-
// Set Shape Constraints
- _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
- _ctx.at(output_index).type()));
- _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
- _ctx.at(input_index).type()));
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -2291,7 +2361,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLReshapeLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
@@ -2299,7 +2369,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEReshapeLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReshapeLayer>();
fn->configure(input_alloc, output_alloc);
@@ -2350,7 +2420,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLSoftmaxLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>();
fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.scale);
@@ -2358,7 +2428,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NESoftmaxLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>();
fn->configure(input_alloc, output_alloc, param.scale);
@@ -2397,14 +2467,18 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
assert(_ctx.at(startData_index).shape().rank() == 1);
assert(_ctx.at(endData_index).shape().rank() == 1);
assert(_ctx.at(stridesData_index).shape().rank() == 1);
- _builder.addShapeConstr(startData_index,
- asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()),
- _ctx.at(startData_index).type()));
+ _builder.addShapeConstr(
+ startData_index,
+ asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()), _ctx.at(startData_index).type(),
+ _ctx.at(startData_index).scale(), _ctx.at(startData_index).zeroPoint()));
_builder.addShapeConstr(endData_index, asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()),
- _ctx.at(endData_index).type()));
- _builder.addShapeConstr(stridesData_index,
- asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()),
- _ctx.at(stridesData_index).type()));
+ _ctx.at(endData_index).type(),
+ _ctx.at(endData_index).scale(),
+ _ctx.at(endData_index).zeroPoint()));
+ _builder.addShapeConstr(
+ stridesData_index,
+ asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()), _ctx.at(stridesData_index).type(),
+ _ctx.at(stridesData_index).scale(), _ctx.at(stridesData_index).zeroPoint()));
// Set initializers for indices data such as order of inputData
{
@@ -2469,7 +2543,7 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLStridedSlice>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSliceEx>();
fn->configure(CAST_CL(inputData_alloc), CAST_CL(outputData_alloc), CAST_CL(startData_alloc),
CAST_CL(endData_alloc), CAST_CL(stridesData_alloc), param.beginMask,
@@ -2484,6 +2558,133 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
_builder.addStage(stage);
}
+void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node)
+{
+ VERBOSE(ReduceMin) << "Configure REDUCEMIN operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto axis_shape = _ctx.at(axis_index).shape();
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ std::set<uint32_t> axis;
+ {
+ const auto ifm_rank = ifm_shape.rank();
+ switch (axis_shape.rank())
+ {
+ case 0: // scalar
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ break;
+ }
+ case 1: // vector
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have
+ // to find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported");
+ break;
+ }
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::set<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::MIN);
+
+ builder.append("ReduceMin", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
{
VERBOSE(ReduceMax) << "Configure REDUCEMAX operation" << std::endl;
@@ -2492,43 +2693,104 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
- // Handle special case only:
- // Input: Matrix (rank 2)
- // Output: Vector (rank 1)
- // Axis: one element (scalar or rank 1 with 1 element), constant
auto ifm_shape = _ctx.at(ifm_index).shape();
auto ofm_shape = _ctx.at(ofm_index).shape();
auto axis_shape = _ctx.at(axis_index).shape();
- assert(ofm_shape.rank() == 1);
- assert(ifm_shape.rank() == 2);
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
assert(_ctx.at(axis_index).hasData());
- assert(axis_shape.rank() == 0 || ((axis_shape.rank() == 1) && (axis_shape.dim(0) == 1)));
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ std::set<uint32_t> axis;
+ {
+ const auto ifm_rank = ifm_shape.rank();
+ switch (axis_shape.rank())
+ {
+ case 0: // scalar
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ break;
+ }
+ case 1: // vector
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
- // Note: Assume only one element in axis. It is checked by assertion above
- // TODO: handle general case
- // Axis is integer value (generally, int32)
- int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
- assert(axis_value == 1);
+ // If axis's data does not exist as constant values and can be gotten as input data, we have
+ // to find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported");
+ break;
+ }
+ }
// Construct operation parameters
struct Param
{
int ofm_index;
int ifm_index;
-
- int32_t axis;
+ std::set<uint32_t> axis;
};
Param param;
param.ofm_index = ofm_index.asInt();
param.ifm_index = ifm_index.asInt();
- param.axis = axis_value;
+ param.axis = axis;
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
@@ -2536,9 +2798,10 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLReduceMax>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
- fn->configure(CAST_CL(ifm_alloc), param.axis, CAST_CL(ofm_alloc));
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::MAX);
builder.append("ReduceMax", std::move(fn));
}
@@ -2586,7 +2849,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node)
if (from_env<bool>(std::getenv("USE_SIMPLE_CAST")))
{
// Use the CPU version of CAST operation
- auto l = nnfw::make_unique<SimpleCastLayer>();
+ auto l = nnfw::cpp14::make_unique<SimpleCastLayer>();
l->configure(input_alloc, output_alloc);
fn = std::move(l);
@@ -2595,7 +2858,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node)
{
if (::internal::arm_compute::isGpuMode())
{
- auto l = nnfw::make_unique<::arm_compute::CLCast>();
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
fn = std::move(l);
@@ -2627,13 +2890,18 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node)
// Set shape constraints
_builder.addShapeConstr(outputValues_index,
asTensorInfo(asTensorShape(_ctx.at(outputValues_index).shape()),
- _ctx.at(outputValues_index).type()));
+ _ctx.at(outputValues_index).type(),
+ _ctx.at(outputValues_index).scale(),
+ _ctx.at(outputValues_index).zeroPoint()));
_builder.addShapeConstr(outputIndices_index,
asTensorInfo(asTensorShape(_ctx.at(outputIndices_index).shape()),
- _ctx.at(outputIndices_index).type()));
- _builder.addShapeConstr(inputData_index,
- asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()),
- _ctx.at(inputData_index).type()));
+ _ctx.at(outputIndices_index).type(),
+ _ctx.at(outputIndices_index).scale(),
+ _ctx.at(outputIndices_index).zeroPoint()));
+ _builder.addShapeConstr(
+ inputData_index,
+ asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()), _ctx.at(inputData_index).type(),
+ _ctx.at(inputData_index).scale(), _ctx.at(inputData_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -2659,7 +2927,7 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLTopKV2>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>();
fn->configure(CAST_CL(input_alloc), param.k, CAST_CL(values_alloc), CAST_CL(indices_alloc));
@@ -2686,12 +2954,15 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
assert(_ctx.at(rhs_index).shape().rank() == 1);
// Set Shape Constraints
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()),
- _ctx.at(lhs_index).type()));
- _builder.addShapeConstr(rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()),
- _ctx.at(rhs_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+ _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+ _builder.addShapeConstr(
+ rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+ _ctx.at(rhs_index).scale(), _ctx.at(ofm_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -2720,7 +2991,7 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
{
std::unique_ptr<::arm_compute::IFunction> fn;
- auto l = nnfw::make_unique<::arm_compute::CLGather>();
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLGather>();
l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
fn = std::move(l);
builder.append("Gather", std::move(fn));
@@ -2732,6 +3003,62 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
_builder.addStage(stage);
}
+void Planner::visit(const ::internal::tflite::op::PReLU::Node &node)
+{
+ VERBOSE(PReLU) << "Configure PReLU operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ _builder.addShapeConstr(alpha_index,
+ asTensorInfo(asTensorShape(_ctx.at(alpha_index).shape()),
+ _ctx.at(alpha_index).type(), _ctx.at(alpha_index).scale(),
+ _ctx.at(alpha_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int alpha_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.alpha_index = alpha_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto alpha_alloc = ctx.at(::internal::tflite::operand::Index{param.alpha_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>();
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(alpha_alloc), CAST_CL(ofm_alloc));
+ builder.append("PReLU", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
{
VERBOSE(ReLU) << "Configure ReLU operation" << std::endl;
@@ -2767,7 +3094,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -2775,7 +3102,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -2821,7 +3148,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -2829,7 +3156,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -2875,7 +3202,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -2883,7 +3210,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -2902,10 +3229,12 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
// Set shape constraints
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
struct Param
{
@@ -2927,7 +3256,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
@@ -2935,7 +3264,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
fn->configure(ifm_alloc, ofm_alloc, act_info);
@@ -2981,14 +3310,20 @@ void Planner::visit(const ::internal::tflite::op::Logistic::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
builder.append("Logistic", std::move(fn));
}
else
- throw std::runtime_error("Not supported, yet");
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+ builder.append("Logistic", std::move(fn));
+ }
};
_builder.addStage(stage);
@@ -3005,52 +3340,89 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
const ::internal::tflite::operand::Index keep_dims_index{node.param().keep_dims_index};
const int keep_dims = _ctx.at(keep_dims_index).asScalar<int>();
- // Set shape constraints
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
- _builder.addShapeConstr(axis_index, asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()),
- _ctx.at(axis_index).type()));
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
- // TODO keep_dims==0
- assert(keep_dims != 0);
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
- // Set axis
- // TODO Other axis (Axis for width and height are currently supported.)
- // TODO Other ranks (Rank 4 is currently supported.)
- assert(_ctx.at(ifm_index).shape().rank() == 4);
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(axis_index,
+ asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()),
+ _ctx.at(axis_index).type(), _ctx.at(axis_index).scale(),
+ _ctx.at(axis_index).zeroPoint()));
- std::vector<uint32_t> axis;
+ std::set<uint32_t> axis;
{
- const auto axis_base = _ctx.at(axis_index).data().base();
- const auto axis_type = _ctx.at(axis_index).type();
- const auto axis_size = _ctx.at(axis_index).shape().asVector();
-
- // NHWC type -> WHCN type
- if (_ctx.at(ofm_index).shape().rank() == 4)
+ const auto ifm_rank = ifm_shape.rank();
+ const auto axis_shape = _ctx.at(axis_index).shape();
+ switch (axis_shape.rank())
{
- for (uint32_t n = 0; n < axis_size; ++n)
+ case 0: // scalar
{
- const ::arm_compute::Coordinates coordinate{n};
- const int32_t *from = reinterpret_cast<const int32_t *>(axis_base) + n;
- if (*from == 1)
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
{
- axis.push_back(1); // h
+ axis_value += ifm_rank;
}
- else if (*from == 2)
- {
- axis.push_back(0); // w
- }
- else if (*from < 0)
- {
- // Nothing to do
- }
- else
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ break;
+ }
+ case 1: // vector
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have
+ // to find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
{
- throw std::runtime_error{"Not supported axis"};
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
}
+ break;
}
+ default:
+ throw std::runtime_error("Not supported");
+ break;
}
}
@@ -3058,7 +3430,7 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
{
int ofm_index;
int ifm_index;
- std::vector<uint32_t> axis;
+ std::set<uint32_t> axis;
};
Param param;
@@ -3073,9 +3445,10 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLReductionMean>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
- fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis);
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::MEAN);
builder.append("Mean", std::move(fn));
}
@@ -3125,23 +3498,37 @@ void Planner::visit(const ::internal::tflite::op::RNN::Node &node)
num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
// Set Shape Constraints and TensorInfo
- _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
- _ctx.at(output_index).type()));
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
_builder.addShapeConstr(hidden_state_out_index,
asTensorInfo(asTensorShape(_ctx.at(hidden_state_out_index).shape()),
- _ctx.at(hidden_state_out_index).type()));
- _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
- _ctx.at(input_index).type()));
+ _ctx.at(hidden_state_out_index).type(),
+ _ctx.at(hidden_state_out_index).scale(),
+ _ctx.at(hidden_state_out_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
_builder.addShapeConstr(weights_index, asTensorInfo(asTensorShape(_ctx.at(weights_index).shape()),
- _ctx.at(weights_index).type()));
+ _ctx.at(weights_index).type(),
+ _ctx.at(weights_index).scale(),
+ _ctx.at(weights_index).zeroPoint()));
_builder.addShapeConstr(recurrent_weights_index,
asTensorInfo(asTensorShape(_ctx.at(recurrent_weights_index).shape()),
- _ctx.at(recurrent_weights_index).type()));
- _builder.addShapeConstr(bias_index, asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
- _ctx.at(bias_index).type()));
+ _ctx.at(recurrent_weights_index).type(),
+ _ctx.at(recurrent_weights_index).scale(),
+ _ctx.at(recurrent_weights_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
_builder.addShapeConstr(hidden_state_in_index,
asTensorInfo(asTensorShape(_ctx.at(hidden_state_in_index).shape()),
- _ctx.at(hidden_state_in_index).type()));
+ _ctx.at(hidden_state_in_index).type(),
+ _ctx.at(hidden_state_in_index).scale(),
+ _ctx.at(hidden_state_in_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -3215,13 +3602,13 @@ void Planner::visit(const ::internal::tflite::op::LSTM::Node &node)
void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
{
VERBOSE(Transpose) << "Configure Transpose operation" << std::endl;
- // Transpose supports only height-wight dimention support.
- // CLPermute can be used to implement generic transpose along any axis
- // But CLPermute only implements [2,0,1], [1,2,0], [3,2,0,1]
- // TODO Implement other permutation CLPermute function and provide generic transpose
const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index permu_index{node.param().permu_index};
+
+ assert(_ctx.at(ifm_index).shape().rank() == _ctx.at(ofm_index).shape().rank());
+ assert(_ctx.at(permu_index).hasData() == true);
// Set shape constraints
_builder.addShapeConstr(
@@ -3230,30 +3617,41 @@ void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
_builder.addShapeConstr(
ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
_ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
- // NNAPI spec provides permutation vector for generic transpose
- // TODO Make the permutation vector a part of Param
+
struct Param
{
int ofm_index;
int ifm_index;
+ const int32_t *pv;
+ int rank;
};
Param param;
param.ofm_index = ofm_index.asInt();
param.ifm_index = ifm_index.asInt();
+ param.pv = reinterpret_cast<const int32_t *>(_ctx.at(permu_index).data().base());
+ param.rank = _ctx.at(ifm_index).shape().rank();
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
const auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
- // CLTranspose assumes only spatial transpose, will be replaced with CLPermute
- // TODO Check the validity of permutation vector, then call CLPermute with permu vector
- auto fn = nnfw::make_unique<::arm_compute::CLTranspose>();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermuteEx>();
- fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
+ getARMComputePermutationVector(param.rank, param.pv));
+
+ builder.append("Transpose", std::move(fn));
+ }
+ else
+ {
+ throw std::runtime_error("Not supported, yet");
+ }
- builder.append("Transpose", std::move(fn));
};
_builder.addStage(stage);
@@ -3267,10 +3665,12 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
const ::internal::tflite::operand::Index ifm_index{node.param().input_index};
// Set shape constraints
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
struct Param
{
@@ -3289,7 +3689,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLFloor>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
@@ -3297,7 +3697,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NEFloor>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>();
fn->configure(ifm_alloc, ofm_alloc);
@@ -3308,11 +3708,367 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
_builder.addStage(stage);
}
+void Planner::visit(const ::internal::tflite::op::ArgMax::Node &node)
+{
+ VERBOSE(ArgMax) << "Configure ARGMAX operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto axis_shape = _ctx.at(axis_index).shape();
+
+ assert(_ctx.at(axis_index).hasData());
+ // Axis dimension is always 1.
+ assert(axis_shape.rank() == 1);
+ assert(ifm_shape.rank() == ofm_shape.rank());
+
+ _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+ _ctx.at(ofm_index).type()));
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+ _ctx.at(ifm_index).type()));
+
+ std::vector<uint32_t> l_axis;
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+ auto axis_base = _ctx.at(axis_index).data().base();
+ auto axis_type = _ctx.at(axis_index).type();
+ // TODO Should support axis size > 1.
+ assert(axis_size == 1);
+ // axis is tensor with 1 dimension - always a vector.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_shape.rank();
+ }
+ l_axis.push_back(ToARMComputeAxis(ifm_shape.rank(), axis_value).value());
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::vector<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = l_axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (from_env<bool>(std::getenv("USE_SIMPLE_ARGMINMAX")))
+ {
+ // USE CPU VERSION OF ARGMAX
+ auto fn = nnfw::cpp14::make_unique<SimpleArgMinMax>();
+
+ fn->configure(ifm_alloc, ofm_alloc, param.axis, ::arm_compute::ArgOperation::MAX);
+
+ builder.append("ArgMax", std::move(fn));
+ }
+ else
+ {
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgMinMax>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ArgOperation::MAX);
+
+ builder.append("ArgMax", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SQRT::Node &node)
+{
+ VERBOSE(SQRT) << "Configure SQRT operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+
+ if (from_env<bool>(std::getenv("USE_SIMPLE_SQRT")))
+ {
+ // USE CPU VERSION OF SQRT
+ auto fn = nnfw::cpp14::make_unique<SimpleSQRT>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ builder.append("SQRT", std::move(fn));
+ }
+ else
+ {
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
+
+ builder.append("SQRT", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(input_alloc, output_alloc, act_info);
+
+ builder.append("SQRT", std::move(fn));
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
void Planner::visit(const ::internal::tflite::op::RSQRT::Node &node)
{
VERBOSE(RSQRT) << "Configure Rsqrt operation" << std::endl;
- throw std::runtime_error("Not supported, yet");
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ const ::arm_compute::ActivationLayerInfoEx act_info{
+ ::arm_compute::ActivationLayerInfoEx::ActivationFunction::RSQRT};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayerEx>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
+
+ builder.append("RSQRT", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Equal::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparisonOp>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::ComparisonOperation::EQUAL);
+
+ builder.append("Equal", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
+{
+ VERBOSE(TransposeConv) << "Configure TransposeConv operation" << std::endl;
+
+ const ::internal::tflite::operand::Index op_shape_index{node.param().op_shape_index};
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+
+ // Only 4D tensors are supported
+ assert(_ctx.at(ofm_index).shape().rank() == 4);
+ assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
+ assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+
+ assert(_ctx.at(padding_index).hasData() == true);
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ assert(vstride > 0);
+ assert(hstride > 0);
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+ assert(ifm_shape.N == ofm_shape.N);
+ assert(ifm_shape.C == ker_shape.C);
+ assert(ker_shape.N == ofm_shape.C);
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+ _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int ker_index;
+ Padding padding;
+ Stride stride;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.ker_index = ker_index.asInt();
+
+ param.stride.horizontal = hstride;
+ param.stride.vertical = vstride;
+
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ifm_shape, param.stride, ker_shape.W, ker_shape.H)
+ : valid_padding();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+
+ auto fn = nnfw::cpp14::make_unique<SimpleTransposeConv>();
+
+ // Only rank 4 is supported
+ const int rank = 4;
+
+ auto tconv_info = asPadStringInfo(param.padding, param.stride);
+
+ fn->configure(ifm_alloc, ker_alloc, ofm_alloc, tconv_info, getARMComputeAxises(rank));
+
+ builder.append("TransposeConv", std::move(fn));
+ };
+ _builder.addStage(stage);
}
void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
@@ -3320,7 +4076,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
- const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
// Set Shape Constraints and TensorInfo
_builder.addShapeConstr(
@@ -3349,8 +4104,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
int ofm_index;
int lhs_index;
int rhs_index;
-
- FuseCode activation;
};
Param param;
@@ -3359,8 +4112,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
param.lhs_index = lhs_index.asInt();
param.rhs_index = rhs_index.asInt();
- param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
-
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
@@ -3368,26 +4119,17 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<SquaredDifferenceOperation>();
-
- // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0,
- ::arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSquaredDifference>();
+ fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
builder.append("SquaredDifference", std::move(fn));
}
- else // NEON
+ else
{
- auto fn = nnfw::make_unique<SquaredDifferenceOperation>();
-
- // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0,
- ::arm_compute::RoundingPolicy::TO_ZERO);
-
- builder.append("SquaredDifference", std::move(fn));
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
}
- ActivationBuilder{builder}.append(param.activation, ofm_alloc);
};
_builder.addStage(stage);
@@ -3446,55 +4188,87 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
- const auto paddings_shape = _ctx.at(paddings_index).shape().asTensor();
+ assert(_ctx.at(paddings_index).hasData() == true);
// Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(ifm_index,
+ asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+ _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(),
+ _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(ofm_index,
+ asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+ _ctx.at(ofm_index).type(), _ctx.at(ofm_index).scale(),
+ _ctx.at(ofm_index).zeroPoint()));
_builder.addShapeConstr(
- ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
- _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
- _builder.addShapeConstr(
- ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
- _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
- _builder.addShapeConstr(
- paddings_index,
- asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape()), _ctx.at(paddings_index).type(),
- _ctx.at(paddings_index).scale(), _ctx.at(paddings_index).zeroPoint()));
+ paddings_index, asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape(), false),
+ _ctx.at(paddings_index).type(), _ctx.at(paddings_index).scale(),
+ _ctx.at(paddings_index).zeroPoint()));
+
+ // initializer for padding
+ {
+ auto pad_type = _ctx.at(paddings_index).type();
+
+ if (pad_type == ANEURALNETWORKS_TENSOR_INT32)
+ {
+ auto pad_base = _ctx.at(paddings_index).data().base();
+ auto pad_size = _ctx.at(paddings_index).data().size();
+ auto pad_shape = _ctx.at(paddings_index).shape().asMatrix();
+
+ // Supported padding for height and width only.
+ auto initializer = std::bind(initMatrixTensor<int32_t>, _1, pad_shape, pad_base, pad_size);
+ _builder.addInitializer(paddings_index, initializer);
+ }
+ else
+ {
+ throw std::runtime_error("Only Int32 datatype is supported for Pad values");
+ }
+ }
// Construct operation parameters
struct Param
{
int ofm_index;
int ifm_index;
- int32_t padding_size;
+ int padding_index;
};
Param param;
param.ofm_index = ofm_index.asInt();
param.ifm_index = ifm_index.asInt();
-
- assert(_ctx.at(paddings_index).hasData() == true);
-
- // TODO: Currently we are supporting uniform padding for the tensor, so only a single
- // value is being read. (TOP = BOTTOM = LEFT = RIGHT).
- // Need to read padding values for all the sides (TOP, BOTTOM, LEFT & RIGHT)
-
- const auto &padding_data = _ctx.at(paddings_index).data();
- auto base = padding_data.base();
- auto padsize = reinterpret_cast<const int *>(base) + 3;
- param.padding_size = *padsize;
+ param.padding_index = paddings_index.asInt();
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto pad_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_index});
- auto fn = nnfw::make_unique<PadLayer>();
+ if (from_env<bool>(std::getenv("USE_SIMPLE_PAD")))
+ {
+ // USE CPU VERSION OF PADLAYER
+ auto rank = 4;
+ auto fn = nnfw::cpp14::make_unique<SimplePadLayer>();
- fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.padding_size);
- builder.append("Pad", std::move(fn));
+ fn->configure(ifm_alloc, ofm_alloc, pad_alloc, getARMComputeAxises(rank));
+ builder.append("PAD", std::move(fn));
+ }
+ else
+ {
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayerEx>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), CAST_CL(pad_alloc));
+
+ builder.append("PAD", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
};
_builder.addStage(stage);
@@ -3506,6 +4280,21 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
const ::internal::tflite::operand::Index input_index{node.param().input_index};
const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+ const auto input_batch = _ctx.at(input_index).shape().dim(0);
+ const auto output_batch = _ctx.at(output_index).shape().dim(0);
+ const auto input_depth = _ctx.at(input_index).shape().dim(3);
+ const auto output_depth = _ctx.at(output_index).shape().dim(3);
+ const auto block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+ const auto input_height = _ctx.at(input_index).shape().dim(1);
+ const auto input_width = _ctx.at(input_index).shape().dim(2);
+
+ // All assertions as per NNAPI specification.
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+ assert((block_size >= 1) && (input_height % block_size == 0) && (input_width % block_size == 0));
+ assert(input_batch == output_batch);
+ assert(input_depth * block_size * block_size == output_depth);
+
// Set Shape Constraints and TensorInfo
_builder.addShapeConstr(output_index,
asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
@@ -3528,17 +4317,284 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
param.output_index = output_index.asInt();
param.input_index = input_index.asInt();
- param.block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+ param.block_size = block_size;
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
- auto rank = 4;
- auto fn = nnfw::make_unique<SimpleSpaceToDepth>();
+ if (from_env<bool>(std::getenv("USE_SIMPLE_SPACETODEPTH")))
+ {
+ // USE CPU VERSION OF SPACETODEPTH
+ auto rank = 4;
+ auto fn = nnfw::cpp14::make_unique<SimpleSpaceToDepth>();
+
+ fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
- fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
- builder.append("SpaceToDepth", std::move(fn));
+ builder.append("SpaceToDepth", std::move(fn));
+ }
+ else
+ {
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+
+ builder.append("SpaceToDepth", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+ const ::internal::tflite::operand::Index padding_size_index{node.param().padding_size_index};
+
+ { // New block for assertions
+
+ // Currently, only 4D NHWC input/output op_context are supported.
+ // The 4D array need to have exactly 2 spatial dimensions.
+ // TODO: Support arbitrary dimension in SpaceToBatchND.
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+ assert(_ctx.at(block_size_index).shape().rank() == 1);
+ assert(_ctx.at(padding_size_index).shape().rank() == 2);
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &block_size_shape = _ctx.at(block_size_index).shape();
+ const auto &padding_size_shape = _ctx.at(padding_size_index).shape();
+
+ assert(output_shape.dim(3) == input_shape.dim(3));
+ assert(block_size_shape.dim(0) == 2);
+ assert(padding_size_shape.dim(0) == 2);
+ assert(padding_size_shape.dim(1) == 2);
+ }
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ _builder.addShapeConstr(block_size_index,
+ asTensorInfo(asTensorShape(_ctx.at(block_size_index).shape()),
+ _ctx.at(block_size_index).type(),
+ _ctx.at(block_size_index).scale(),
+ _ctx.at(block_size_index).zeroPoint()));
+
+ _builder.addShapeConstr(padding_size_index,
+ asTensorInfo(asTensorShape(_ctx.at(padding_size_index).shape()),
+ _ctx.at(padding_size_index).type(),
+ _ctx.at(padding_size_index).scale(),
+ _ctx.at(padding_size_index).zeroPoint()));
+
+ if (_ctx.at(block_size_index).hasData())
+ {
+ const auto rank = _ctx.at(input_index).shape().rank();
+ const auto num_of_block_size = _ctx.at(block_size_index).shape().asVector();
+ auto block_size_base = _ctx.at(block_size_index).data().base();
+ auto block_size_type = _ctx.at(block_size_index).type();
+
+ switch (block_size_type)
+ {
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = [block_size_base, num_of_block_size,
+ rank](::arm_compute::ITensor &tensor) {
+ assert(num_of_block_size < 4);
+ for (size_t n = 0; n < num_of_block_size; ++n)
+ {
+ const int32_t *from = reinterpret_cast<const int32_t *>(block_size_base) + n;
+ int32_t *into = reinterpret_cast<int32_t *>(
+ tensor.ptr_to_element({ToARMComputeAxis(rank, n + 1).value()}));
+ *into = *from;
+ }
+ };
+ _builder.addInitializer(block_size_index, initializer);
+
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported");
+ }
+ }
+ }
+
+ if (_ctx.at(padding_size_index).hasData())
+ {
+ const auto padding_size_shape = _ctx.at(padding_size_index).shape();
+ const auto rank = _ctx.at(input_index).shape().rank();
+ auto padding_size_base = _ctx.at(padding_size_index).data().base();
+ auto padding_size_type = _ctx.at(padding_size_index).type();
+
+ switch (padding_size_type)
+ {
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = [padding_size_base, padding_size_shape,
+ rank](::arm_compute::ITensor &tensor) {
+ assert(padding_size_shape.dim(1) == 2);
+ assert(padding_size_shape.dim(0) < 4);
+ for (size_t n = 0; n < padding_size_shape.dim(0); ++n)
+ {
+ const int32_t *from = reinterpret_cast<const int32_t *>(padding_size_base) +
+ (n * padding_size_shape.dim(1));
+ int32_t *into = reinterpret_cast<int32_t *>(
+ tensor.ptr_to_element({0, ToARMComputeAxis(rank, n + 1).value()}));
+ into[0] = from[0];
+ into[1] = from[1];
+ }
+ };
+ _builder.addInitializer(padding_size_index, initializer);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported");
+ }
+ }
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int block_size_index;
+ int padding_size_index;
+ int32_t rank;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size_index = block_size_index.asInt();
+ param.padding_size_index = padding_size_index.asInt();
+ param.rank = _ctx.at(input_index).shape().rank();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ auto block_size_alloc = ctx.at(::internal::tflite::operand::Index{param.block_size_index});
+ auto padding_size_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_size_index});
+
+ // NOTE SimpleSpaceToBatchND is quite slow
+ if (from_env<bool>(std::getenv("USE_SIMPLE_SPACE_TO_BATCH_ND")))
+ {
+ auto fn = nnfw::cpp14::make_unique<SimpleSpaceToBatchND>();
+
+ fn->configure(input_alloc, block_size_alloc, padding_size_alloc, output_alloc);
+ builder.append("SpaceToBatchND", std::move(fn));
+ }
+ else if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(padding_size_alloc),
+ CAST_CL(output_alloc));
+ builder.append("SpaceToBatchND", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+ assert(_ctx.at(block_size_index).hasData() == true);
+
+ const int32_t *block_size =
+ reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base());
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &input_shape = _ctx.at(input_index).shape();
+
+ assert((_ctx.at(block_size_index).data().size() / sizeof(int32_t)) == 2 && block_size[0] > 0 &&
+ block_size[1] > 0);
+ {
+ assert(output_shape.dim(3) == input_shape.dim(3));
+ assert(output_shape.dim(1) == input_shape.dim(1) * block_size[0]);
+ assert(output_shape.dim(2) == input_shape.dim(2) * block_size[1]);
+ assert(output_shape.dim(0) == input_shape.dim(0) / (block_size[0] * block_size[1]));
+ }
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ output_index, asTensorInfo(asTensorShape(output_shape, false), _ctx.at(output_index).type(),
+ _ctx.at(output_index).scale(), _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(
+ input_index, asTensorInfo(asTensorShape(input_shape, false), _ctx.at(input_index).type(),
+ _ctx.at(input_index).scale(), _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ const int32_t *block_size;
+ int32_t rank;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size = block_size;
+ param.rank = _ctx.at(input_index).shape().rank();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ // NOTE SimpleBatchToSpaceND is quite slow, but may be useful for debugging
+ if (from_env<bool>(std::getenv("USE_SIMPLE_BATCH_TO_SPACE_ND")))
+ {
+ auto fn = nnfw::cpp14::make_unique<SimpleBatchToSpaceND>();
+
+ fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(param.rank));
+ builder.append("BatchToSpaceND", std::move(fn));
+ }
+ else if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceND>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+ builder.append("BatchToSpaceND", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
};
@@ -3550,9 +4606,6 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
- const auto ofm_shape = _ctx.at(ofm_index).shape();
- const auto ifm_shape = _ctx.at(ifm_index).shape();
-
// Set Shape Constraints and TensorInfo
_builder.addShapeConstr(
ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
@@ -3583,10 +4636,10 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
param.ofm_index = ofm_index.asInt();
param.ifm_index = ifm_index.asInt();
- param.radius = 2 * ifm_shape.dim(3) + 1; // normSize = depth * 2 + 1
- param.alpha = 1.0f; // In the implementation to make alpha_ become 1
- param.beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
- param.bias = 0.0f; // Don't offset the reduction.
+ param.radius = 2 * _ctx.at(ifm_index).shape().dim(3) + 1; // normSize = depth * 2 + 1
+ param.alpha = 1.0f; // In the implementation to make alpha_ become 1
+ param.beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
+ param.bias = 0.0f; // Don't offset the reduction.
auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
@@ -3598,7 +4651,7 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayerEx>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
@@ -3606,9 +4659,9 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
}
else
{
- auto fn = nnfw::make_unique<::arm_compute::NENormalizationLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayerEx>();
- fn->configure(CAST_NE(ifm_alloc), CAST_NE(ofm_alloc), norm_info);
+ fn->configure(ifm_alloc, ofm_alloc, norm_info);
builder.append("L2Normalize", std::move(fn));
}
@@ -3647,10 +4700,12 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node
assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
(ANEURALNETWORKS_PADDING_VALID == padding_type));
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
struct Param
{
@@ -3678,7 +4733,7 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node
param.stride.horizontal = hstride;
param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
- ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+ ? same_padding(ifm_shape, param.stride, kw, kh)
: valid_padding();
param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
@@ -3731,9 +4786,6 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node
const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-
const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
@@ -3745,10 +4797,12 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node
const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
- _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
- _ctx.at(ofm_index).type()));
- _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
- _ctx.at(ifm_index).type()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
// Construct operation parameters
struct Param
@@ -3876,11 +4930,27 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node)
auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index});
auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index});
- auto fn = nnfw::make_unique<SimpleEmbeddingLookup>();
+ if (from_env<bool>(std::getenv("USE_SIMPLE_EMBEDDINGLOOKUP")))
+ {
+ auto fn = nnfw::cpp14::make_unique<SimpleEmbeddingLookup>();
- fn->configure(lookups_alloc, values_alloc, output_alloc);
+ fn->configure(lookups_alloc, values_alloc, output_alloc);
- builder.append("EmbeddingLookup", std::move(fn));
+ builder.append("EmbeddingLookup", std::move(fn));
+ }
+ else if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>();
+
+ fn->configure(CAST_CL(values_alloc), CAST_CL(output_alloc), CAST_CL(lookups_alloc));
+
+ builder.append("EmbeddingLookup", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
};
_builder.addStage(stage);
@@ -3888,8 +4958,857 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node)
void Planner::visit(const ::internal::tflite::op::HashtableLookup::Node &node)
{
- // TODO Implement HashtableLookup
- throw std::runtime_error("Not supported");
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index hits_index{node.param().hits_index};
+ const ::internal::tflite::operand::Index lookups_index{node.param().lookups_index};
+ const ::internal::tflite::operand::Index values_index{node.param().values_index};
+ const ::internal::tflite::operand::Index keys_index{node.param().keys_index};
+
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &keys_obj = _ctx.at(keys_index);
+ const auto &hits_obj = _ctx.at(hits_index);
+ const auto &values_obj = _ctx.at(values_index);
+ const auto &output_obj = _ctx.at(output_index);
+
+ assert(lookups_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(keys_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(hits_obj.type() == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM);
+
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &keys_shape = keys_obj.shape();
+ const auto &hits_shape = hits_obj.shape();
+ const auto &values_shape = values_obj.shape();
+ const auto &output_shape = output_obj.shape();
+
+ assert(values_shape.rank() == output_shape.rank());
+
+ assert(lookups_shape.rank() == 1);
+ assert(keys_shape.rank() == 1);
+ assert(values_shape.dim(0) == keys_shape.dim(0));
+ assert(lookups_shape.dim(0) == output_shape.dim(0));
+
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(hits_index,
+ asTensorInfo(asTensorShape(_ctx.at(hits_index).shape()),
+ _ctx.at(hits_index).type(), _ctx.at(hits_index).type(),
+ _ctx.at(hits_index).zeroPoint()));
+
+ _builder.addShapeConstr(lookups_index, asTensorInfo(asTensorShape(_ctx.at(lookups_index).shape()),
+ _ctx.at(lookups_index).type(),
+ _ctx.at(lookups_index).scale(),
+ _ctx.at(lookups_index).zeroPoint()));
+ _builder.addShapeConstr(values_index,
+ asTensorInfo(asTensorShape(_ctx.at(values_index).shape()),
+ _ctx.at(values_index).type(), _ctx.at(values_index).scale(),
+ _ctx.at(values_index).zeroPoint()));
+ _builder.addShapeConstr(keys_index,
+ asTensorInfo(asTensorShape(_ctx.at(keys_index).shape()),
+ _ctx.at(keys_index).type(), _ctx.at(keys_index).scale(),
+ _ctx.at(keys_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int32_t output_index;
+ int32_t hits_index;
+ int32_t lookups_index;
+ int32_t values_index;
+ int32_t keys_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.hits_index = hits_index.asInt();
+ param.lookups_index = lookups_index.asInt();
+ param.values_index = values_index.asInt();
+ param.keys_index = keys_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto hits_alloc = ctx.at(::internal::tflite::operand::Index{param.hits_index});
+ auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index});
+ auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index});
+ auto keys_alloc = ctx.at(::internal::tflite::operand::Index{param.keys_index});
+
+ if (from_env<bool>(std::getenv("USE_SIMPLE_HASHTABLELOOKUP")))
+ {
+ auto fn = nnfw::cpp14::make_unique<SimpleHashtableLookupLayer>();
+
+ fn->configure(lookups_alloc, keys_alloc, values_alloc, output_alloc, hits_alloc);
+
+ builder.append("HashtableLookup", std::move(fn));
+ }
+ else if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>();
+
+ fn->configure(CAST_CL(lookups_alloc), CAST_CL(keys_alloc), CAST_CL(values_alloc),
+ CAST_CL(output_alloc), CAST_CL(hits_alloc));
+
+ builder.append("HashtableLookup", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index radius_index{node.param().radius_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+ const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index};
+ const ::internal::tflite::operand::Index beta_index{node.param().beta_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int32_t radius;
+ float bias;
+ float alpha;
+ float beta;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.radius = _ctx.at(radius_index).asScalar<int32_t>();
+ param.alpha = _ctx.at(alpha_index).asScalar<float>();
+ param.beta = _ctx.at(beta_index).asScalar<float>();
+ param.bias = _ctx.at(bias_index).asScalar<float>();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const auto norm_info =
+ ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, param.radius,
+ param.alpha, param.beta, param.bias, false);
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayerEx>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
+
+ builder.append("LocalResponseNormalization", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayerEx>();
+
+ fn->configure(ifm_alloc, ofm_alloc, norm_info);
+
+ builder.append("LocalResponseNormalization", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+
+ int32_t block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+ assert(block_size > 0);
+
+ { // assertions block
+ const auto output_shape = _ctx.at(output_index).shape();
+ const auto input_shape = _ctx.at(input_index).shape();
+ assert(output_shape.dim(0) == input_shape.dim(0));
+ assert(output_shape.dim(1) == input_shape.dim(1) * block_size);
+ assert(output_shape.dim(2) == input_shape.dim(2) * block_size);
+ assert(input_shape.dim(3) % (block_size * block_size) == 0);
+ assert(output_shape.dim(3) == input_shape.dim(3) / (block_size * block_size));
+ }
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int32_t block_size;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size = block_size;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ if (from_env<bool>(std::getenv("USE_SIMPLE_DEPTHTOSPACE")))
+ {
+ // USE CPU VERSION OF DEPTHTOSPACE
+ auto rank = 4;
+ auto fn = nnfw::cpp14::make_unique<SimpleDepthToSpace>();
+
+ fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
+
+ builder.append("DepthToSpace", std::move(fn));
+ }
+ else
+ {
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+
+ builder.append("DepthToSpace", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
+{
+ VERBOSE(Unpack) << "Configure Unpack operation" << std::endl;
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ uint32_t input_rank = _ctx.at(ifm_index).shape().rank();
+
+ assert(input_rank == 4 || input_rank == 3 || input_rank == 2);
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+ _ctx.at(ifm_index).type()));
+
+ int32_t axis =
+ _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>();
+ // int32_t num_split =
+ // _ctx.at(::internal::tflite::operand::Index{node.param().num_split_index}).asScalar<int32_t>();
+
+ for (const auto &index : node.param().ofm_indexes)
+ {
+ const ::internal::tflite::operand::Index ofm_index{index};
+ _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
+ _ctx.at(ofm_index).type()));
+ }
+
+ struct Param
+ {
+ std::vector<int32_t> ofm_indexes;
+ int ifm_index;
+ int axis;
+ };
+
+ if (input_rank == 4)
+ {
+ Param param;
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis;
+ for (const auto &index : node.param().ofm_indexes)
+ {
+ param.ofm_indexes.push_back(index);
+ }
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<SimpleUnpackLayer>();
+ std::vector<::arm_compute::ICLTensor *> outputs;
+ for (const auto &index : param.ofm_indexes)
+ {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{index});
+ outputs.push_back(CAST_CL(output_alloc));
+ }
+ fn->configure(CAST_CL(input_alloc), outputs, param.axis);
+
+ builder.append("Unpack", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+ }
+ else if (input_rank == 3)
+ {
+ // TODO: generate test case for this and generalize 4D method all cases.
+ throw std::runtime_error("UNPACK_3D not implemented");
+ }
+ else if (input_rank == 2)
+ {
+ throw std::runtime_error("UNPACK_2D not implemented");
+ }
+ else
+ {
+ throw std::runtime_error("UNPACK axis is not valid");
+ }
+}
+
+void Planner::visit(const ::internal::tflite::op::Pack::Node &node)
+{
+ VERBOSE(Pack) << "Configure Pack operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const uint32_t output_rank = _ctx.at(ofm_index).shape().rank();
+ const uint32_t input_rank = output_rank - 1;
+
+ assert(output_rank == 4 || output_rank == 3 || output_rank == 2);
+
+ for (const auto &index : node.param().ifm_indexes)
+ {
+ const ::internal::tflite::operand::Index ifm_index{index};
+ assert(_ctx.at(ifm_index).shape().rank() == input_rank);
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+ _ctx.at(ifm_index).type()));
+ }
+
+ _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
+ _ctx.at(ofm_index).type()));
+
+ int32_t axis =
+ _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>();
+
+ struct Param
+ {
+ std::vector<int32_t> ifm_indexes;
+ int ofm_index;
+ int axis;
+ };
+
+ if (input_rank == 3)
+ {
+ Param param;
+ param.ofm_index = ofm_index.asInt();
+ param.axis = axis;
+
+ // TODO: Fix this once all permutations are present.
+ if (param.axis != 0)
+ {
+ throw std::runtime_error("This axis not supported, some 4D permutations are missing");
+ }
+
+ for (const auto &index : node.param().ifm_indexes)
+ {
+ param.ifm_indexes.push_back(index);
+ }
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<SimplePackLayer>();
+ std::vector<::arm_compute::ICLTensor *> inputs;
+ for (const auto &index : param.ifm_indexes)
+ {
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{index});
+ inputs.push_back(CAST_CL(input_alloc));
+ }
+ fn->configure(inputs, CAST_CL(output_alloc), param.axis);
+
+ builder.append("Pack", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+ }
+ else if (input_rank == 2)
+ {
+ // TODO: generate test case for this and generalize 4D method all cases.
+ throw std::runtime_error("PACK_2D not implemented");
+ }
+ else if (input_rank == 1)
+ {
+ throw std::runtime_error("PACK_1D not implemented");
+ }
+ else
+ {
+ throw std::runtime_error("PACK axis is not valid");
+ }
+}
+
+void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ // NOTE SimpleNeg is quite slow, but may be useful for debugging
+ if (from_env<bool>(std::getenv("USE_SIMPLE_NEG")))
+ {
+ auto fn = nnfw::cpp14::make_unique<SimpleNeg>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+ builder.append("Neg", std::move(fn));
+ }
+ else if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+ builder.append("Neg", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Exp::Node &node)
+{
+ VERBOSE(Exp) << "Configure Exp operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExp>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+
+ builder.append("Exp", std::move(fn));
+ }
+ else
+ {
+ throw std::runtime_error("Not supported");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node)
+{
+ VERBOSE(ReduceSum) << "Configure ReduceSum operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+ const auto axis_shape = _ctx.at(axis_index).shape();
+
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ uint32_t input_rank = ifm_shape.rank();
+ std::set<uint32_t> axis;
+ int32_t axis_rank = axis_shape.rank();
+
+ if (axis_rank == 0)
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += input_rank;
+ }
+ axis.insert(ToARMComputeAxis(input_rank, axis_value).value());
+ }
+ else if (axis_rank == 1)
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have to
+ // find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += input_rank;
+ }
+ axis.insert(ToARMComputeAxis(input_rank, axis_value).value());
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Not supported axis");
+ }
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::set<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::SUM);
+
+ builder.append("ReduceSum", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Abs::Node &node)
+{
+ // TODO Implement Abs op
+ throw std::runtime_error("Not supported yet");
+}
+
+void Planner::visit(const ::internal::tflite::op::NotEqual::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparisonOp>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::ComparisonOperation::NOT_EQUAL);
+
+ builder.append("NotEqual", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalAnd::Node &node)
+{
+ VERBOSE(Logical_AND) << "Configure Logical_AND operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::BinaryLogicalOperation::AND);
+
+ builder.append("LogicalAnd", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalNot::Node &node)
+{
+ // TODO Implement LogicalNot op
+ throw std::runtime_error("Not supported yet");
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalOr::Node &node)
+{
+ VERBOSE(LogicalOr) << "Configure LogicalOr operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::BinaryLogicalOperation::OR);
+
+ builder.append("LogicalOr", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
}
class AllocationContext final : public IAllocationContext
@@ -3939,32 +5858,69 @@ private:
::internal::arm_compute::Plan &_plan;
};
+/**
+ * @brief Class to provide methods of compilation plan builder
+ */
class PlanBuilder final : public IPlanBuilder
{
public:
+ /**
+ * @brief Construct a new PlanBuilder object with Plan
+ * @param [in] plan The Plan object
+ */
PlanBuilder(::internal::arm_compute::Plan &plan) : _plan{plan}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Add TensorInfo with Shape Constraints
+ * @param [in] ind Index of operand
+ * @param [in] info TensorInfo value to set to index of operand
+ * @return N/A
+ */
void addShapeConstr(const ::internal::tflite::operand::Index &ind,
const ::arm_compute::TensorInfo &info) override;
public:
+ /**
+ * @brief Add Subsumption constraints
+ * @param [in] ind Index of operand
+ * @param [in] base Index of base operand of Subsumption
+ * @param [in] offset Offset of Subsumption
+ * @param [in] shape Shape of Subsumption
+ * @param [in] extend_parent extend_parent value of Subsumption
+ * @return N/A
+ */
void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
const ::internal::tflite::operand::Index &base,
const ::arm_compute::Coordinates &offset,
const ::arm_compute::TensorShape &shape, bool extend_parent) override;
public:
+ /**
+ * @brief Add Initializer lambda with ITensor param
+ * @param [in] ind Index of operand
+ * @param [in] initializer Initializer to add
+ * @return N/A
+ */
void addInitializer(const ::internal::tflite::operand::Index &ind,
const Initializer &initializer) override;
public:
+ /**
+ * @brief Add Stage lambda with IAllocationContext and IExecutionBuilder params
+ * @param [in] stage Stage to add
+ * @return N/A
+ */
void addStage(const Stage &stage) override;
public:
+ /**
+ * @brief Finilize(build) the Plan
+ * @return N/A
+ */
void finalize(void) const;
private:
@@ -4197,6 +6153,8 @@ void PlanBuilder::finalize(void) const
auto type = operands.at(operand_idx).type();
auto shape = operands.at(operand_idx).shape();
+ // Need to support scalar types (ANEURALNETWORKS_FLOAT32 and ANEURALNETWORKS_INT32)
+ // for rank > 1 tensor, because it can be operand of broadcast operation
switch (rank)
{
case 0: // scalar
@@ -4240,12 +6198,14 @@ void PlanBuilder::finalize(void) const
auto size = shape.asVector();
switch (type)
{
+ case ANEURALNETWORKS_FLOAT32:
case ANEURALNETWORKS_TENSOR_FLOAT32:
{
auto initializer = std::bind(initVectorTensor<float>, _1, base, size);
_plan.operands().at(operand_idx).access(initializer);
break;
}
+ case ANEURALNETWORKS_INT32:
case ANEURALNETWORKS_TENSOR_INT32:
{
auto initializer = std::bind(initVectorTensor<int32_t>, _1, base, size);
@@ -4270,12 +6230,14 @@ void PlanBuilder::finalize(void) const
auto size = operands.at(operand_idx).data().size();
switch (type)
{
+ case ANEURALNETWORKS_FLOAT32:
case ANEURALNETWORKS_TENSOR_FLOAT32:
{
auto initializer = std::bind(initMatrixTensor<float>, _1, matrix_shape, base, size);
_plan.operands().at(operand_idx).access(initializer);
break;
}
+ case ANEURALNETWORKS_INT32:
case ANEURALNETWORKS_TENSOR_INT32:
{
auto initializer = std::bind(initMatrixTensor<int32_t>, _1, matrix_shape, base, size);
@@ -4300,12 +6262,14 @@ void PlanBuilder::finalize(void) const
auto size = operands.at(operand_idx).data().size();
switch (type)
{
+ case ANEURALNETWORKS_FLOAT32:
case ANEURALNETWORKS_TENSOR_FLOAT32:
{
auto initializer = std::bind(initTensor3D<float>, _1, tensor_shape, base, size);
_plan.operands().at(operand_idx).access(initializer);
break;
}
+ case ANEURALNETWORKS_INT32:
case ANEURALNETWORKS_TENSOR_INT32:
{
auto initializer = std::bind(initTensor3D<int32_t>, _1, tensor_shape, base, size);
@@ -4330,12 +6294,14 @@ void PlanBuilder::finalize(void) const
auto size = operands.at(operand_idx).data().size();
switch (type)
{
+ case ANEURALNETWORKS_FLOAT32:
case ANEURALNETWORKS_TENSOR_FLOAT32:
{
auto initializer = std::bind(initFeatureTensor<float>, _1, feature_shape, base, size);
_plan.operands().at(operand_idx).access(initializer);
break;
}
+ case ANEURALNETWORKS_INT32:
case ANEURALNETWORKS_TENSOR_INT32:
{
auto initializer =
@@ -4417,8 +6383,10 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
if (::internal::arm_compute::isGpuMode())
{
arm_compute::CLScheduler::get().default_init();
- arm_compute::CLKernelLibraryEx::get().init("./cl_kernels/", cl::Context::getDefault(),
- cl::Device::getDefault());
+ // NOTE CLKernelLibraryEx must use the same context as CLScheduler
+ // It did not check whether another device is available.
+ arm_compute::CLKernelLibraryEx::get().init(
+ "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
}
const auto &operands = compilation->plan().model().operands();
diff --git a/runtimes/pure_arm_compute/src/compilation.h b/runtimes/pure_arm_compute/src/compilation.h
index dd3613b2d..1a06d06b9 100644
--- a/runtimes/pure_arm_compute/src/compilation.h
+++ b/runtimes/pure_arm_compute/src/compilation.h
@@ -14,15 +14,28 @@
* limitations under the License.
*/
+/**
+ * @file compilation.h
+ * @brief This file defines ANeuralNetworksCompilation class for handling Compilation NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __COMPILATION_H__
#define __COMPILATION_H__
#include "internal/Model.h"
#include "internal/arm_compute.h"
+/**
+ * @brief struct to define Compilation of NNAPI
+ */
struct ANeuralNetworksCompilation
{
public:
+ /**
+ * @brief Construct with params
+ * @param [in] model Pointer of internal::tflite::Model to set internal::arm_compute::Plan
+ */
ANeuralNetworksCompilation(const std::shared_ptr<const internal::tflite::Model> &model)
: _plan{new internal::arm_compute::Plan{model}}
{
@@ -30,11 +43,28 @@ public:
}
public:
+ /**
+ * @brief Get reference of internal::arm_compute::Plan
+ * @return Reference of internal::arm_compute::Plan
+ */
internal::arm_compute::Plan &plan(void) { return *_plan; }
public:
+ /**
+ * @brief Publish internal Plan to param
+ * @param [out] plan Pointer of internal::arm_compute::Plan to be set
+ * @return N/A
+ */
void publish(std::shared_ptr<const internal::arm_compute::Plan> &plan) { plan = _plan; }
+ /**
+ * @brief Get @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false
+ * @return @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false
+ */
bool isFinished(void) { return _isFinished; }
+ /**
+ * @brief Mark compilation process finished
+ * @return N/A
+ */
void markAsFinished() { _isFinished = true; }
private:
diff --git a/runtimes/pure_arm_compute/src/event.h b/runtimes/pure_arm_compute/src/event.h
index 5d41dca84..b5595583c 100644
--- a/runtimes/pure_arm_compute/src/event.h
+++ b/runtimes/pure_arm_compute/src/event.h
@@ -14,9 +14,18 @@
* limitations under the License.
*/
+/**
+ * @file event.h
+ * @brief This file defines ANeuralNetworksEvent struct for handling Event NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __EVENT_H__
#define __EVENT_H__
+/**
+ * @brief struct to define Event of NNAPI
+ */
struct ANeuralNetworksEvent
{
};
diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc
index 778a22155..b7eba1cef 100644
--- a/runtimes/pure_arm_compute/src/execution.cc
+++ b/runtimes/pure_arm_compute/src/execution.cc
@@ -18,8 +18,8 @@
#include "compilation.h"
#include "execution.h"
-#include "util/profiling/profiling.h"
-#include "util/profiling/profiler.h"
+#include "profiling/profiling.h"
+#include "profiling/profiler.h"
#include "event.h"
#include "internal/VectorSource.h"
@@ -34,7 +34,7 @@
#include "internal/Tensor3DSink.h"
#include "internal/FeatureSink.h"
-#include "util/feature/IndexIterator.h"
+#include "misc/feature/IndexIterator.h"
#include <arm_compute/runtime/CL/CLScheduler.h>
@@ -70,7 +70,7 @@ static void asVectorSource(ANeuralNetworksExecution *execution, int32_t type, in
}
static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::matrix::Shape &shape, const void *buffer,
+ const nnfw::misc::matrix::Shape &shape, const void *buffer,
size_t length)
{
switch (type)
@@ -100,7 +100,7 @@ static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, in
}
static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::tensor::Shape &shape, const void *buffer,
+ const nnfw::misc::tensor::Shape &shape, const void *buffer,
size_t length)
{
switch (type)
@@ -130,7 +130,7 @@ static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type,
}
static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::tensor::Shape &shape, const void *buffer,
+ const nnfw::misc::tensor::Shape &shape, const void *buffer,
size_t length)
{
switch (type)
@@ -160,7 +160,7 @@ static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, in
}
static void asFeatureSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::feature::Shape &shape, const void *buffer,
+ const nnfw::misc::feature::Shape &shape, const void *buffer,
size_t length)
{
switch (type)
@@ -244,7 +244,7 @@ static void asMatrixSink(ANeuralNetworksExecution *execution, int32_t type, int3
}
static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::feature::Shape &shape, void *buffer, size_t length)
+ const nnfw::misc::feature::Shape &shape, void *buffer, size_t length)
{
switch (type)
{
@@ -272,7 +272,7 @@ static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int
}
static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::tensor::Shape &shape, void *buffer, size_t length)
+ const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
{
assert(shape.rank() == 3);
@@ -302,7 +302,7 @@ static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, in
}
static void asTensorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
- const nnfw::util::tensor::Shape &shape, void *buffer, size_t length)
+ const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
{
switch (type)
{
@@ -420,9 +420,9 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32
// squeeze(shape) eliminates all the dimensions whose dimensionality is 1
// For example, squeeze([3, 1, 3]) returns [3, 3]
-static nnfw::util::tensor::Shape squeeze(const nnfw::util::tensor::Shape &shape)
+static nnfw::misc::tensor::Shape squeeze(const nnfw::misc::tensor::Shape &shape)
{
- nnfw::util::tensor::Shape res(0);
+ nnfw::misc::tensor::Shape res(0);
for (uint32_t axis = 0; axis < shape.rank(); ++axis)
{
@@ -497,7 +497,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- const bool sync = profiling::Context::get().sync().enabled();
+ const bool sync = profiling::Context::get().sync();
const auto &plan = execution->plan();
const auto &model = plan.model();
diff --git a/runtimes/pure_arm_compute/src/execution.h b/runtimes/pure_arm_compute/src/execution.h
index c036fe2c8..f55ab3fbf 100644
--- a/runtimes/pure_arm_compute/src/execution.h
+++ b/runtimes/pure_arm_compute/src/execution.h
@@ -14,6 +14,13 @@
* limitations under the License.
*/
+/**
+ * @file execution.h
+ * @brief This file contains ANeuralNetworksExecution class for handling Execution NNAPI such as
+ * ANeuralNetworksExecution_create, ANeuralNetworksExecution_setInput
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __EXECUTION_H__
#define __EXECUTION_H__
@@ -21,9 +28,16 @@
#include "internal/Sink.h"
#include "internal/Source.h"
+/**
+ * @brief struct to express Execution of NNAPI
+ */
struct ANeuralNetworksExecution
{
public:
+ /**
+ * @brief Construct with params
+ * @param [in] plan Pointer to get internal::arm_compute::Plan
+ */
ANeuralNetworksExecution(const std::shared_ptr<const internal::arm_compute::Plan> &plan)
: _plan{plan}
{
@@ -32,31 +46,69 @@ public:
}
public:
+ /**
+ * @brief Get reference of internal::arm_compute::Plan
+ * @return Const reference of internal::arm_compute::Plan
+ */
const internal::arm_compute::Plan &plan(void) const { return *_plan; }
private:
std::shared_ptr<const internal::arm_compute::Plan> _plan;
public:
+ /**
+ * @brief Set the nth source with param
+ * @param [in] n Index of the nth source
+ * @param [in] source Pointer to set the nth source from
+ * @return N/A
+ */
// TODO Use InputIndex instead of int
void source(int n, std::unique_ptr<Source> &&source) { _sources.at(n) = std::move(source); }
+ /**
+ * @brief Set the nth source with param
+ * @param [in] n Index of the nth source
+ * @param [in] args Arguments to set the nth source from
+ * @return N/A
+ */
template <typename T, typename... Args> void source(int n, Args &&... args)
{
source(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}});
}
public:
+ /**
+ * @brief Get the nth source
+ * @param [in] n Index of the nth source
+ * @return Const reference of Source
+ */
const Source &source(int n) const { return *(_sources.at(n)); }
public:
+ /**
+ * @brief Set the nth sink with param
+ * @param [in] n Index of the nth sink
+ * @param [in] sink Pointer to set the nth sink from
+ * @return N/A
+ */
// TODO Use OutputIndex instead of int
void sink(int n, std::unique_ptr<Sink> &&sink) { _sinks.at(n) = std::move(sink); }
+ /**
+ * @brief Set the nth sink with param
+ * @param [in] n Index of the nth sink
+ * @param [in] args Arguments to set the nth sink from
+ * @return N/A
+ */
template <typename T, typename... Args> void sink(int n, Args &&... args)
{
sink(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}});
}
public:
+ /**
+ * @brief Get the nth sink
+ * @param [in] n Index of the nth sink
+ * @return Const reference of Sink
+ */
const Sink &sink(int n) const { return *(_sinks.at(n)); }
private:
diff --git a/runtimes/pure_arm_compute/src/internal/FeatureSink.h b/runtimes/pure_arm_compute/src/internal/FeatureSink.h
index 9e4412c2a..7c6884141 100644
--- a/runtimes/pure_arm_compute/src/internal/FeatureSink.h
+++ b/runtimes/pure_arm_compute/src/internal/FeatureSink.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        FeatureSink.h
+ * @brief       This file contains FeatureSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_FEATURE_SINK_H__
#define __INTERNAL_FEATURE_SINK_H__
@@ -21,22 +27,36 @@
#include "internal/nnapi/feature/View.h"
#include "internal/arm_compute/feature/View.h"
-#include <util/feature/Shape.h>
-#include "util/feature/IndexIterator.h"
+#include <misc/feature/Shape.h>
+#include "misc/feature/IndexIterator.h"
-//
-// FeatureSink
-//
+/**
+ * @brief Class to store Feature(4D) output data.
+ * This is for pulling data to internal tensor from other tensor.
+ * @tparam T Type of the data elements
+ */
template <typename T> class FeatureSink final : public Sink
{
public:
- FeatureSink(const nnfw::util::feature::Shape &shape, T *base, const size_t size)
+ /**
+ * @brief Construct a FeatureSink object
+ *
+ * @param[in] shape 4D tensor dimensions for this feature
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ FeatureSink(const nnfw::misc::feature::Shape &shape, T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Pull the data into the internal structure
+ * @param[in] tensor The tensor which contains source data
+ * @return N/A
+ */
void pull(::arm_compute::ITensor &tensor) const override
{
const ::internal::arm_compute::feature::View<T> from{&tensor};
@@ -44,7 +64,7 @@ public:
// Inevitably casting must be done.
::internal::nnapi::feature::View<T> into{_shape, _base, _size};
- ::nnfw::util::feature::iterate(_shape)
+ ::nnfw::misc::feature::iterate(_shape)
<< [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
const auto value = from.at(batch, ch, row, col);
into.at(batch, ch, row, col) = value;
@@ -52,7 +72,7 @@ public:
}
private:
- const nnfw::util::feature::Shape _shape;
+ const nnfw::misc::feature::Shape _shape;
T *const _base;
const size_t _size;
};
diff --git a/runtimes/pure_arm_compute/src/internal/FeatureSource.h b/runtimes/pure_arm_compute/src/internal/FeatureSource.h
index fca56e341..772beb701 100644
--- a/runtimes/pure_arm_compute/src/internal/FeatureSource.h
+++ b/runtimes/pure_arm_compute/src/internal/FeatureSource.h
@@ -14,31 +14,54 @@
* limitations under the License.
*/
+/**
+ * @file        FeatureSource.h
+ * @brief       This file contains FeatureSource class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_FEATURE_SOURCE_H__
#define __INTERNAL_FEATURE_SOURCE_H__
-#include <util/feature/Shape.h>
-#include <util/feature/IndexIterator.h>
+#include <misc/feature/Shape.h>
+#include <misc/feature/IndexIterator.h>
#include "internal/nnapi/feature/Reader.h"
#include "internal/arm_compute/feature/View.h"
+/**
+ * @brief Class to store feature(4D) input data.
+ * This is for push out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
template <typename T> class FeatureSource final : public Source
{
public:
- FeatureSource(const nnfw::util::feature::Shape &shape, const T *base, const size_t size)
+ /**
+ * @brief Construct a FeatureSource object
+ *
+ * @param[in] shape 4D tensor dimensions for this feature
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ FeatureSource(const nnfw::misc::feature::Shape &shape, const T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Push the data out to the another tensor
+ * @param[out] The tensor that output data will be stored
+ * @return N/A
+ */
void push(::arm_compute::ITensor &tensor) const override
{
const ::internal::nnapi::feature::Reader<T> from{_shape, _base, _size};
::internal::arm_compute::feature::View<T> into{&tensor};
- ::nnfw::util::feature::iterate(_shape)
+ ::nnfw::misc::feature::iterate(_shape)
<< [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
const auto value = from.at(batch, ch, row, col);
into.at(batch, ch, row, col) = value;
@@ -46,7 +69,7 @@ public:
}
private:
- const nnfw::util::feature::Shape _shape;
+ const nnfw::misc::feature::Shape _shape;
const T *const _base;
const size_t _size;
};
diff --git a/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h b/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h
index aa1e67177..2a6e2a743 100644
--- a/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h
+++ b/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file IExecutionBuilder.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines interface of ExecutionBuilder
+ */
#ifndef __INTERNAL_IEXECUTION_BUILDER_H__
#define __INTERNAL_IEXECUTION_BUILDER_H__
@@ -22,10 +27,22 @@
#include <memory>
#include <string>
+/**
+ * @brief Struct to define interface of ExecutionBuilder
+ */
struct IExecutionBuilder
{
+ /**
+ * @brief Destroy the IExecutionBuilder object
+ */
virtual ~IExecutionBuilder() = default;
+ /**
+ * @brief Append function to execute
+ * @param[in] name Name of function
+ * @param[in] f Function to append
+ * @return N/A
+ */
virtual void append(const std::string &name, std::unique_ptr<::arm_compute::IFunction> &&f) = 0;
};
diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSink.h b/runtimes/pure_arm_compute/src/internal/MatrixSink.h
index 32bd49dc6..23ecc112b 100644
--- a/runtimes/pure_arm_compute/src/internal/MatrixSink.h
+++ b/runtimes/pure_arm_compute/src/internal/MatrixSink.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file MatrixSink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines MatrixSink class
+ */
#ifndef __INTERNAL_MATRIX_SINK_H__
#define __INTERNAL_MATRIX_SINK_H__
@@ -27,9 +32,19 @@
#include <cstring>
#include <cassert>
+/**
+ * @brief Class to get matrix data from arm compute tensor
+ */
template <typename T> class MatrixSink final : public Sink
{
public:
+ /**
+ * @brief Construct a new Matrix Sink object
+ * @param[in] H Height of matrix
+ * @param[in] W Width of matrix
+ * @param[in] base Pointer to get data
+ * @param[in] size Size of matrix
+ */
MatrixSink(const int32_t H, const int32_t W, T *base, const size_t size)
: _height{H}, _width{W}, _base{base}
{
@@ -37,6 +52,11 @@ public:
}
public:
+ /**
+ * @brief Get matrix data from arm compute tensor to base
+ * @param[in] tensor Tensor object of arm compute to get data
+ * @return N/A
+ */
void pull(::arm_compute::ITensor &tensor) const override
{
assert(tensor.info()->dimension(0) == _width);
diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSource.h b/runtimes/pure_arm_compute/src/internal/MatrixSource.h
index 2f5d92484..71d6a804f 100644
--- a/runtimes/pure_arm_compute/src/internal/MatrixSource.h
+++ b/runtimes/pure_arm_compute/src/internal/MatrixSource.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file MatrixSource.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines MatrixSource class
+ */
#ifndef __INTERNAL_MATRIX_SOURCE_H__
#define __INTERNAL_MATRIX_SOURCE_H__
@@ -23,16 +28,30 @@
#include "internal/Source.h"
+/**
+ * @brief Class to push matrix data to arm compute tensor
+ */
template <typename T> class MatrixSource final : public Source
{
public:
- MatrixSource(const nnfw::util::matrix::Shape &shape, const T *base, const size_t size)
+ /**
+ * @brief Construct a new MatrixSource object
+ * @param[in] shape Shape of matrix
+ * @param[in] base Pointer of matrix data to push
+ * @param[in] size Size of matrix
+ */
+ MatrixSource(const nnfw::misc::matrix::Shape &shape, const T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// do nothing
}
public:
+ /**
+ * @brief Push matrix data to arm compute tensor
+ * @param[out] tensor Tensor object of arm compute to push matrix data
+ * @return N/A
+ */
void push(::arm_compute::ITensor &tensor) const override
{
using ::arm_compute::Window;
@@ -55,7 +74,7 @@ public:
}
private:
- const nnfw::util::matrix::Shape _shape;
+ const nnfw::misc::matrix::Shape _shape;
const T *const _base;
const size_t _size;
};
diff --git a/runtimes/pure_arm_compute/src/internal/Model.cc b/runtimes/pure_arm_compute/src/internal/Model.cc
index 3a31f9911..03753fea2 100644
--- a/runtimes/pure_arm_compute/src/internal/Model.cc
+++ b/runtimes/pure_arm_compute/src/internal/Model.cc
@@ -16,8 +16,6 @@
#include "internal/Model.h"
-#include <cassert>
-
namespace internal
{
namespace tflite
@@ -25,7 +23,7 @@ namespace tflite
namespace operand
{
-Shape::Shape(uint32_t rank) : nnfw::util::tensor::Shape(rank)
+Shape::Shape(uint32_t rank) : nnfw::misc::tensor::Shape(rank)
{
// DO NOTHING
}
@@ -37,17 +35,17 @@ int32_t Shape::asVector(void) const
return dim(0);
}
-nnfw::util::matrix::Shape Shape::asMatrix(void) const
+nnfw::misc::matrix::Shape Shape::asMatrix(void) const
{
assert(rank() == 2);
const auto height = dim(0);
const auto width = dim(1);
- return nnfw::util::matrix::Shape(height, width);
+ return nnfw::misc::matrix::Shape(height, width);
}
-nnfw::util::feature::Shape Shape::asFeature(void) const
+nnfw::misc::feature::Shape Shape::asFeature(void) const
{
assert(rank() == 4);
@@ -62,15 +60,15 @@ nnfw::util::feature::Shape Shape::asFeature(void) const
const auto height = dim(1);
const auto width = dim(2);
- return nnfw::util::feature::Shape(batch, depth, height, width);
+ return nnfw::misc::feature::Shape(batch, depth, height, width);
}
-nnfw::util::tensor::Shape Shape::asTensor(void) const
+nnfw::misc::tensor::Shape Shape::asTensor(void) const
{
- return nnfw::util::tensor::Shape(*this); // this shape represents shape of NNAPI
+ return nnfw::misc::tensor::Shape(*this); // this shape represents shape of NNAPI
}
-nnfw::util::kernel::Shape Shape::asKernel(void) const
+nnfw::misc::kernel::Shape Shape::asKernel(void) const
{
assert(rank() == 4);
@@ -84,7 +82,7 @@ nnfw::util::kernel::Shape Shape::asKernel(void) const
const auto height = dim(1);
const auto width = dim(2);
- return nnfw::util::kernel::Shape(count, depth, height, width);
+ return nnfw::misc::kernel::Shape(count, depth, height, width);
}
// Extended dimension is filled with 1.
@@ -120,6 +118,11 @@ const Object &Set::at(const Index &index) const { return *(_objects.at(index.asI
Object &Set::at(const Index &index) { return *(_objects.at(index.asInt())); }
+bool Set::exist(const Index &index) const
+{
+ return index.asInt() >= 0 && index.asInt() < _objects.size();
+}
+
} // namespace operand
} // namespace tflite
} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/Model.h b/runtimes/pure_arm_compute/src/internal/Model.h
index 33ba3a8fd..bdcf32f6f 100644
--- a/runtimes/pure_arm_compute/src/internal/Model.h
+++ b/runtimes/pure_arm_compute/src/internal/Model.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Model.h
+ * @brief This file contains classes for handle internal Model object
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_MODEL_H__
#define __INTERNAL_MODEL_H__
@@ -24,15 +30,26 @@ namespace tflite
namespace operand
{
+/**
+ * @brief Class to express index of operand.
+ */
class Index
{
public:
+ /**
+ * @brief Construct a new Index object for operand with param.
+ * @param [in] value The number of index
+ */
explicit Index(int value) : _value{value}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get index value as int
+ * @return Index value as int
+ */
int asInt(void) const { return _value; }
private:
@@ -46,10 +63,10 @@ private:
#include <vector>
#include <cstdint>
-#include "util/feature/Shape.h"
-#include "util/matrix/Shape.h"
-#include "util/kernel/Shape.h"
-#include "util/tensor/Shape.h"
+#include "misc/feature/Shape.h"
+#include "misc/matrix/Shape.h"
+#include "misc/kernel/Shape.h"
+#include "misc/tensor/Shape.h"
namespace internal
{
@@ -58,19 +75,51 @@ namespace tflite
namespace operand
{
-struct Shape : public nnfw::util::tensor::Shape
+/**
+ * @brief Class to express shape of operand.
+ */
+struct Shape : public nnfw::misc::tensor::Shape
{
public:
+ /**
+ * @brief Construct a new Shape object for operand with param.
+ * @param [in] rank The rank value of shape
+ */
Shape(uint32_t rank);
public:
+ /**
+ * @brief Get dimension value of tensor as vector
+ * @return Dimension value(int32_t) of tensor as vector
+ */
int32_t asVector(void) const;
- nnfw::util::feature::Shape asFeature(void) const;
- nnfw::util::matrix::Shape asMatrix(void) const;
- nnfw::util::kernel::Shape asKernel(void) const;
- nnfw::util::tensor::Shape asTensor(void) const;
+ /**
+ * @brief Get dimension values of tensor as feature::Shape
+ * @return Dimension values of tensor as feature::Shape
+ */
+ nnfw::misc::feature::Shape asFeature(void) const;
+ /**
+ * @brief Get dimension values of tensor as matrix::Shape
+ * @return Dimension values of tensor as matrix::Shape
+ */
+ nnfw::misc::matrix::Shape asMatrix(void) const;
+ /**
+ * @brief Get dimension values of tensor as kernel::Shape
+ * @return Dimension values of tensor as kernel::Shape
+ */
+ nnfw::misc::kernel::Shape asKernel(void) const;
+ /**
+ * @brief Get dimension values of tensor::Shape
+ * @return Dimension values of tensor::Shape
+ */
+ nnfw::misc::tensor::Shape asTensor(void) const;
public:
+ /**
+ * @brief Extend rank of Shape object for operand with param.
+ * @param [in] to_rank The rank value to be extended to
+ * @return N/A
+ */
void extendRank(size_t);
};
@@ -87,27 +136,60 @@ namespace tflite
namespace operand
{
+/**
+ * @brief Class to have data of operand.
+ */
struct Data
{
+ /**
+ * @brief Destruct this object
+ */
virtual ~Data() = default;
+ /**
+ * @brief Get size of data
+ * @return size of data
+ */
virtual size_t size(void) const = 0;
+ /**
+ * @brief Get the base address of data
+ * @return the base address of data
+ */
virtual const uint8_t *base(void) const = 0;
};
+/**
+ * @brief Class to have cached data of operand.
+ */
class CachedData final : public Data
{
public:
+ /**
+ * @brief Construct a new CachedData object for operand with param.
+ * @param [in] base the base address of data
+ * @param [in] size the size of data
+ */
CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size}
{
std::copy(base, base + size, _base);
}
public:
+ /**
+ * @brief Destruct this object
+ */
~CachedData() { delete[] _base; }
public:
+ /**
+ * @brief Get size of data
+ * @return size of data
+ */
size_t size(void) const override { return _size; }
+ /**
+ * @brief Get the base address of data
+ * @return the base address of data
+ */
const uint8_t *base(void) const override { return _base; }
private:
@@ -115,16 +197,32 @@ private:
size_t _size;
};
+/**
+ * @brief Class to have external data of operand.
+ */
class ExternalData final : public Data
{
public:
+ /**
+ * @brief Construct a new ExternalData object for operand with param.
+ * @param [in] base the base address of data
+ * @param [in] size the size of data
+ */
ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get size of data
+ * @return size of data
+ */
size_t size(void) const override { return _size; }
+ /**
+ * @brief Get the base address of data
+ * @return the base address of data
+ */
const uint8_t *base(void) const override { return _base; }
private:
@@ -148,9 +246,19 @@ namespace tflite
namespace operand
{
+/**
+ * @brief Class to express operand as object.
+ */
class Object
{
public:
+ /**
+ * @brief Construct a new Object object for operand with param.
+ * @param [in] shape shape of operand
+ * @param [in] type type of operand
+ * @param [in] scale scale of operand
+ * @param [in] zeroPoint zeroPoint of operand
+ */
explicit Object(const Shape &shape, const int32_t type, const float scale,
const int32_t zeroPoint)
: _shape{shape}, _type{type}, _scale{scale}, _zeroPoint{zeroPoint}
@@ -159,25 +267,58 @@ public:
}
public:
+ /**
+ * @brief Get shape of operand
+ * @return Reference of shape of operand
+ */
const Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get type of operand
+ * @return type of operand
+ */
const int32_t type(void) const { return _type; }
+ /**
+ * @brief Get scale of operand
+ * @return scale of operand
+ */
const float scale(void) const { return _scale; }
+ /**
+ * @brief Get zeroPoint of operand
+ * @return zeroPoint of operand
+ */
const int32_t zeroPoint(void) const { return _zeroPoint; }
private:
void data(std::unique_ptr<Data> &&data) { _data = std::move(data); }
public:
+ /**
+ * @brief Get data of operand
+ * @return Reference of data of operand
+ */
const Data &data(void) const { return *_data; }
+ /**
+ * @brief Get true if Object has data, otherwise @c false
+ * @return @c true if Object has data, otherwise @c false
+ */
bool hasData(void) const { return _data != nullptr; }
public:
+ /**
+ * @brief Set data of operand with param
+ * @param [in] args arguments of data to be set
+ * @return N/A
+ */
template <typename T, typename... Args> void data(Args &&... args)
{
data(std::unique_ptr<T>(new T{std::forward<Args>(args)...}));
}
public:
+ /**
+ * @brief Get value of data as scalar
+ * @return value of data as scalar
+ */
template <typename T> T asScalar(void) const
{
assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1)));
@@ -188,6 +329,11 @@ public:
}
public:
+ /**
+ * @brief Get value of data as ReorderBits
+ * @param [in] numOfBits The number of bits to be reordered to
+ * @return value of data as ReorderBits
+ */
template <typename T> T asReorderBits(size_t numOfBits) const
{
assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1)));
@@ -209,8 +355,6 @@ private:
} // namespace tflite
} // namespace internal
-#include <memory>
-
namespace internal
{
namespace tflite
@@ -218,9 +362,17 @@ namespace tflite
namespace operand
{
+/**
+ * @brief Class to have object instances in a kind of set
+ */
class Set
{
public:
+ /**
+ * @brief Iterate objects with fn
+ * @param [in] fn function to be iterated
+ * @return N/A
+ */
void iterate(const std::function<void(const Index &)> &fn)
{
for (uint32_t n = 0; n < _objects.size(); ++n)
@@ -231,12 +383,35 @@ public:
}
public:
+ /**
+ * @brief Append Object for operand with param
+ * @param [in] shape shape of operand
+ * @param [in] type type of operand
+ * @param [in] scale scale of operand
+ * @param [in] zeroPoint zeroPoint of operand
+ * @return Value of Index which has been appended to
+ */
Index append(const Shape &, int32_t type, float scale, int32_t zeroPoint);
public:
+ /**
+ * @brief Get Object at Index
+ * @param [in] index Index to be at
+ * @return Const refernece of Object
+ */
const Object &at(const Index &) const;
+ /**
+ * @brief Get Object at Index
+ * @param [in] index Index to be at
+ * @return Refernece of Object
+ */
Object &at(const Index &);
+ /**
+ * @brief Get size of operands in Set
+ * @return Value of size
+ */
size_t size(void) const { return _objects.size(); }
+ bool exist(const Index &) const;
private:
std::vector<std::unique_ptr<Object>> _objects;
@@ -255,16 +430,36 @@ namespace tflite
namespace op
{
+/**
+ * @brief Class to have sequence operators.
+ */
class Sequence
{
public:
+ /**
+ * @brief Construct a new Sequence object for operator as default
+ */
Sequence() = default;
public:
+ /**
+ * @brief Get size of operators in Sequence
+ * @return Value of size
+ */
uint32_t size(void) const { return _ops.size(); }
public:
+ /**
+ * @brief Get op::Node at Index
+ * @param [in] nth index to be at
+ * @return Refernece of op::Node
+ */
op::Node &at(uint32_t nth) { return *(_ops.at(nth)); }
+ /**
+ * @brief Get op::Node at Index
+ * @param [in] nth index to be at
+ * @return Const refernece of op::Node
+ */
const op::Node &at(uint32_t nth) const { return *(_ops.at(nth)); }
private:
@@ -275,6 +470,11 @@ private:
}
public:
+ /**
+ * @brief Add op::Node with param
+ * @param [in] args arguments of op::Node to be set
+ * @return Reference of Sequence
+ */
template <typename T, typename... Args> Sequence &emplace_back(Args &&... args)
{
return emplace_back(std::unique_ptr<T>(new T{std::forward<Args>(args)...}));
@@ -293,14 +493,33 @@ namespace internal
namespace tflite
{
+/**
+ * @brief Class to have operand::Set as operands and op::Sequence as operators
+ */
class Model
{
public:
+ /**
+ * @brief Get operand::Set
+ * @return Reference of operand::Set
+ */
operand::Set &operands(void) { return _operands; }
+ /**
+ * @brief Get operand::Set
+ * @return Const reference of operand::Set
+ */
const operand::Set &operands(void) const { return _operands; }
public:
+ /**
+ * @brief Get op::Sequence
+ * @return Reference of op::Sequence
+ */
op::Sequence &operations(void) { return _operations; }
+ /**
+ * @brief Get op::Sequence
+ * @return Const reference of op::Sequence
+ */
const op::Sequence &operations(void) const { return _operations; }
private:
@@ -309,8 +528,8 @@ private:
public:
// TODO Hide these fields
- std::vector<operand::Index> inputs;
- std::vector<operand::Index> outputs;
+ std::vector<operand::Index> inputs; /**< indexes of operand as input */
+ std::vector<operand::Index> outputs; /**< indexes of operand as output */
};
} // namespace tflite
diff --git a/runtimes/pure_arm_compute/src/internal/Sink.h b/runtimes/pure_arm_compute/src/internal/Sink.h
index af3b37e61..6f44561ea 100644
--- a/runtimes/pure_arm_compute/src/internal/Sink.h
+++ b/runtimes/pure_arm_compute/src/internal/Sink.h
@@ -14,15 +14,31 @@
* limitations under the License.
*/
+/**
+ * @file Sink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Sink struct
+ */
#ifndef __INTERNAL_SINK_H__
#define __INTERNAL_SINK_H__
#include <arm_compute/core/ITensor.h>
+/**
+ * @brief Struct to get tensor data from arm compute tensor (abstract)
+ */
struct Sink
{
+ /**
+ * @brief Destroy the Sink object
+ */
virtual ~Sink() = default;
+ /**
+ * @brief Get tensor data from arm compute tensor
+ * @param[in] tensor Tensor object of arm compute to get data
+ * @return N/A
+ */
virtual void pull(::arm_compute::ITensor &tensor) const = 0;
};
diff --git a/runtimes/pure_arm_compute/src/internal/Sinks.h b/runtimes/pure_arm_compute/src/internal/Sinks.h
index e8a7d5966..7317c67c1 100644
--- a/runtimes/pure_arm_compute/src/internal/Sinks.h
+++ b/runtimes/pure_arm_compute/src/internal/Sinks.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        Sinks.h
+ * @brief       This file contains TensorSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_SINKS_H__
#define __INTERNAL_SINKS_H__
@@ -28,29 +34,46 @@
#include "internal/nnapi/tensor/View.h"
#include "internal/arm_compute/tensor/View.h"
-#include "util/tensor/IndexIterator.h"
+#include "misc/tensor/IndexIterator.h"
+/**
+ * @brief Class to store NN model output data for general-shaped tensors.
+ * This is for pulling data to internal tensor from other tensor.
+ * @tparam T Type of the data elements
+ */
template <typename T> class TensorSink final : public Sink
{
public:
- TensorSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size)
+ /**
+ * @brief Construct a TensorSink object
+ *
+ * @param[in] shape general-shaped tensor dimensions
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ TensorSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Pull the data into the internal structure
+ * @param[in] tensor The tensor which contains source data
+ * @return N/A
+ */
void pull(::arm_compute::ITensor &tensor) const override
{
const ::internal::arm_compute::tensor::View<T> from{&tensor};
::internal::nnapi::tensor::View<T> into{_shape, _base, _size};
- using ::nnfw::util::tensor::iterate;
- using ::nnfw::util::tensor::Index;
+ using ::nnfw::misc::tensor::iterate;
+ using ::nnfw::misc::tensor::Index;
const uint32_t rank = _shape.rank();
- ::nnfw::util::tensor::iterate(_shape) << [&](const Index &raw) {
+ ::nnfw::misc::tensor::iterate(_shape) << [&](const Index &raw) {
Index permuted(raw.rank());
for (uint32_t axis = 0; axis < rank; ++axis)
@@ -64,7 +87,7 @@ public:
}
private:
- const nnfw::util::tensor::Shape _shape;
+ const nnfw::misc::tensor::Shape _shape;
private:
T *const _base;
diff --git a/runtimes/pure_arm_compute/src/internal/Source.h b/runtimes/pure_arm_compute/src/internal/Source.h
index a159e5092..fa8f1e811 100644
--- a/runtimes/pure_arm_compute/src/internal/Source.h
+++ b/runtimes/pure_arm_compute/src/internal/Source.h
@@ -14,15 +14,32 @@
* limitations under the License.
*/
+/**
+ * @file Source.h
+ * @brief This file contains Source struct for pushing ITensor
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_SOURCE_H__
#define __INTERNAL_SOURCE_H__
#include <arm_compute/core/ITensor.h>
+/**
+ * @brief Struct to push inner source to ITensor.
+ */
struct Source
{
+ /**
+ * @brief Destructor as default
+ */
virtual ~Source() = default;
+ /**
+ * @brief Push inner source to ITensor
+ * @param [in] tensor ITensor to be pushed into
+ * @return N/A
+ */
virtual void push(::arm_compute::ITensor &tensor) const = 0;
};
diff --git a/runtimes/pure_arm_compute/src/internal/Swizzle.h b/runtimes/pure_arm_compute/src/internal/Swizzle.h
index 66bf7aef6..f127b8a3b 100644
--- a/runtimes/pure_arm_compute/src/internal/Swizzle.h
+++ b/runtimes/pure_arm_compute/src/internal/Swizzle.h
@@ -14,28 +14,53 @@
* limitations under the License.
*/
+/**
+ * @file Swizzle.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines ARMComputeAxis class and utility functions to support mapping
+ * between arm compute axis and NNAPI axis
+ */
#ifndef __SWIZZLE_H__
#define __SWIZZLE_H__
+/**
+ * @brief Class to represent arm compute axis
+ */
class ARMComputeAxis
{
public:
+ /**
+ * @brief Construct a new ARMComputeAxis object
+ */
ARMComputeAxis() = default;
public:
+ /**
+ * @brief Construct a new ARMComputeAxis object
+ * @param[in] value Raw axis number
+ */
explicit ARMComputeAxis(uint32_t value) : _value{value}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get raw axis number
+ * @return Raw axis number
+ */
uint32_t value(void) const { return _value; }
private:
uint32_t _value;
};
-// Convert T/F Lite / NNAPI axis (based on ...NHWC) to ARMCompute axis (WHCN...)
+/**
+ * @brief Convert T/F Lite / NNAPI axis (based on ...NHWC) to arm compute axis (WHCN...)
+ * @param[in] rank Rank of shape
+ * @param[in] axis Axis to map
+ * @return ARMComputeAxis including arm compute axis info
+ */
inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
{
assert(rank > axis);
@@ -68,6 +93,12 @@ inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
#include <cassert>
+/**
+ * @brief Covert bitmask info from NNAPI axis to arm compute axis
+ * @param[in] in Bitmask data
+ * @param[in] numOfBits Used bits (rank)
+ * @return Coverted bitmask
+ */
template <typename T> inline T ReorderBits(T in, size_t numOfBits)
{
assert(numOfBits > 0);
diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h
index 20de3b9e8..1e14e2d6c 100644
--- a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h
+++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file Tensor3DSink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Tensor3DSink class
+ */
#ifndef __TENSOR3D_SINK_H__
#define __TENSOR3D_SINK_H__
@@ -26,16 +31,30 @@
#include <arm_compute/core/Window.h>
#include <arm_compute/core/Helpers.h>
+/**
+ * @brief Class to get tensor data from arm compute tensor
+ */
template <typename T> class Tensor3DSink final : public Sink
{
public:
- Tensor3DSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size)
+ /**
+ * @brief Construct a new Tensor3DSink object
+ * @param[in] shape Shape of tensor
+ * @param[in] base Pointer to get data
+ * @param[in] size Size of tensor
+ */
+ Tensor3DSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get tensor data from arm compute tensor to base
+ * @param[in] tensor Tensor object of arm compute to get data
+ * @return N/A
+ */
void pull(::arm_compute::ITensor &tensor) const override
{
using ::arm_compute::Window;
@@ -60,7 +79,7 @@ public:
}
private:
- const nnfw::util::tensor::Shape _shape;
+ const nnfw::misc::tensor::Shape _shape;
private:
T *const _base;
diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h
index c100bbdd2..3d8d1b958 100644
--- a/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h
+++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file Tensor3DSource.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Tensor3DSource class
+ */
#ifndef __TENSOR3D_SOURCE_H__
#define __TENSOR3D_SOURCE_H__
@@ -26,16 +31,30 @@
#include <arm_compute/core/Window.h>
#include <arm_compute/core/Helpers.h>
+/**
+ * @brief Class to push tensor data to arm compute tensor
+ */
template <typename T> class Tensor3DSource final : public Source
{
public:
- Tensor3DSource(const nnfw::util::tensor::Shape &shape, const T *base, const size_t size)
+ /**
+ * @brief Construct a new Tensor3DSource object
+ * @param[in] shape Shape of tensor
+ * @param[in] base Pointer of tensor data to push
+ * @param[in] size Size of tensor
+ */
+ Tensor3DSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Push tensor data to arm compute tensor
+ * @param[out] tensor Tensor object of arm compute to push tensor data
+ * @return N/A
+ */
void push(::arm_compute::ITensor &tensor) const override
{
using ::arm_compute::Window;
@@ -60,7 +79,7 @@ public:
}
private:
- const nnfw::util::tensor::Shape _shape;
+ const nnfw::misc::tensor::Shape _shape;
private:
const T *const _base;
diff --git a/runtimes/pure_arm_compute/src/internal/TensorSource.h b/runtimes/pure_arm_compute/src/internal/TensorSource.h
index 0ddc44855..114d3588e 100644
--- a/runtimes/pure_arm_compute/src/internal/TensorSource.h
+++ b/runtimes/pure_arm_compute/src/internal/TensorSource.h
@@ -14,11 +14,17 @@
* limitations under the License.
*/
+/**
+ * @file TensorSource.h
+ * @brief This file contains TensorSource class which is inherited from Source class
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_TENSOR_SOURCE_H__
#define __INTERNAL_TENSOR_SOURCE_H__
-#include <util/tensor/Shape.h>
-#include <util/tensor/IndexIterator.h>
+#include <misc/tensor/Shape.h>
+#include <misc/tensor/IndexIterator.h>
#include "internal/Source.h"
#include "internal/Swizzle.h"
@@ -26,24 +32,38 @@
#include "internal/arm_compute/tensor/View.h"
// NOTE TensorSource is much slower than specialized Source(s)
+/**
+ * @brief Class to define constructor and push function
+ */
template <typename T> class TensorSource final : public Source
{
public:
- TensorSource(const nnfw::util::tensor::Shape &shape, const T *base, const size_t size)
+ /**
+ * @brief Construct a new TensorSource object with params
+ * @param [in] shape Shape of tensor
+ * @param [in] base Base address
+ * @param [in] size Size of tensor
+ */
+ TensorSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size)
: _shape{shape}, _base{base}, _size{size}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Function for pushing tensor
+ * @param [in] tensor Tensor to be pushed
+ * @return N/A
+ */
void push(::arm_compute::ITensor &tensor) const override
{
const ::internal::nnapi::tensor::Reader<T> from{_shape, _base, _size};
::internal::arm_compute::tensor::View<T> into{&tensor};
- ::nnfw::util::tensor::iterate(_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) {
+ ::nnfw::misc::tensor::iterate(_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) {
const auto rank = index_nnapi.rank();
- nnfw::util::tensor::Index index_ACL(rank);
+ nnfw::misc::tensor::Index index_ACL(rank);
for (uint32_t axis = 0; axis < rank; ++axis)
{
@@ -55,7 +75,7 @@ public:
}
private:
- const nnfw::util::tensor::Shape _shape;
+ const nnfw::misc::tensor::Shape _shape;
const T *const _base;
const size_t _size;
};
diff --git a/runtimes/pure_arm_compute/src/internal/VectorSink.h b/runtimes/pure_arm_compute/src/internal/VectorSink.h
index d1bf962e2..a630ef1c1 100644
--- a/runtimes/pure_arm_compute/src/internal/VectorSink.h
+++ b/runtimes/pure_arm_compute/src/internal/VectorSink.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        VectorSink.h
+ * @brief       This file contains VectorSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_VECTOR_SINK_H__
#define __INTERNAL_VECTOR_SINK_H__
@@ -23,18 +29,31 @@
#include <cassert>
-//
-// VectorSink
-//
+/**
+ * @brief Class to store vector(2D) output data.
+ * This is for pulling out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
template <typename T> class VectorSink final : public Sink
{
public:
+ /**
+ * @brief Construct a VectorSink object
+ * @param[in] vlen Length of the vector
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
VectorSink(const int32_t vlen, T *base, const size_t size) : _vlen{vlen}, _base{base}
{
assert(size >= _vlen * sizeof(T));
}
public:
+ /**
+ * @brief Pull the data into the internal structure
+ * @param[in] tensor The tensor which contains source data
+ * @return N/A
+ */
void pull(::arm_compute::ITensor &tensor) const override
{
for (int32_t n = 0; n < _vlen; ++n)
diff --git a/runtimes/pure_arm_compute/src/internal/VectorSource.h b/runtimes/pure_arm_compute/src/internal/VectorSource.h
index 41aab07e4..48d3d3209 100644
--- a/runtimes/pure_arm_compute/src/internal/VectorSource.h
+++ b/runtimes/pure_arm_compute/src/internal/VectorSource.h
@@ -14,20 +14,42 @@
* limitations under the License.
*/
+/**
+ * @file        VectorSource.h
+ * @brief       This file contains VectorSource class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_VECTOR_SOURCE_H__
#define __INTERNAL_VECTOR_SOURCE_H__
#include "internal/Source.h"
+/**
+ * @brief Class to store vector(2D) input data.
+ * This is for push out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
template <typename T> class VectorSource final : public Source
{
public:
+ /**
+ * @brief Construct a VectorSource object
+ * @param[in] vlen Length of the vector
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
VectorSource(const int32_t vlen, const T *base, const size_t size) : _vlen{vlen}, _base{base}
{
assert(size >= _vlen * sizeof(T));
}
public:
+ /**
+ * @brief Push the data out to the another tensor
+ * @param[out] The tensor that output data will be stored
+ * @return N/A
+ */
void push(::arm_compute::ITensor &tensor) const override
{
for (int32_t n = 0; n < _vlen; ++n)
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute.h b/runtimes/pure_arm_compute/src/internal/arm_compute.h
index ef43ed45c..fb6acaf81 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        arm_compute.h
+ * @brief       This file contains arm_compute library related classes
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_ARM_COMPUTE_H__
#define __INTERNAL_ARM_COMPUTE_H__
@@ -28,6 +34,9 @@ namespace arm_compute
namespace operand
{
+/**
+ * @brief Class to access the tensor object
+ */
class Object
{
public:
@@ -40,12 +49,21 @@ public:
}
public:
+ /**
+ * @brief Get the tensor pointer
+ * @return The tensor pointer
+ */
::arm_compute::ITensor *ptr(void) const { return _tensor.get(); }
private:
std::shared_ptr<::arm_compute::ITensor> _tensor;
public:
+ /**
+ * @brief Access the tensor object and run the given function
+ : @param[in] fn The actual behavior when accessing the tensor object
+ * @return N/A
+ */
void access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const;
};
@@ -64,24 +82,48 @@ namespace arm_compute
namespace operand
{
+/**
+ * @brief Class to manage Object instances
+ */
class Context
{
public:
+ /**
+ * @brief Set index and tensor pair
+ * @param[in] ind The operand index
+ * @param[in] tensor The tensor object
+ * @return This object reference
+ */
Context &set(const ::internal::tflite::operand::Index &ind,
const std::shared_ptr<::arm_compute::ITensor> &tensor);
public:
+ /**
+ * @brief Check if the tensor for given index is exist
+ * @param[in] ind The operand Index
+ * @return @c true if the entry for ind is exist, otherwise @c false
+ */
bool exist(const ::internal::tflite::operand::Index &ind) const
{
return _objects.find(ind.asInt()) != _objects.end();
}
public:
+ /**
+ * @brief Lookup the tensor with the given index
+ * @param[in] ind The index as the key
+ * @return The object const reference
+ */
const Object &at(const ::internal::tflite::operand::Index &ind) const
{
return _objects.at(ind.asInt());
}
+ /**
+ * @brief Lookup the tensor with the given index
+ * @param[in] ind The index as the key
+ * @return The object reference
+ */
Object &at(const ::internal::tflite::operand::Index &ind) { return _objects.at(ind.asInt()); }
private:
@@ -101,19 +143,38 @@ namespace arm_compute
namespace op
{
+/**
+ * @brief Class to wrap IFunction
+ */
class Step
{
public:
+ /**
+ * @brief Construct a Step object
+ * @param[in] func The compiled code to be executed
+ */
Step(std::unique_ptr<::arm_compute::IFunction> &&func) : _func{std::move(func)}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Run _func
+ * @return N/A
+ */
void run(void) const { _func->run(); }
public:
+ /**
+ * @brief Get member @c _name
+ * @return The name as const reference
+ */
const std::string &name(void) const { return _name; }
+ /**
+ * @brief Get member @c _name
+ * @return The name as reference
+ */
std::string &name(void) { return _name; }
private:
@@ -121,7 +182,15 @@ private:
std::unique_ptr<::arm_compute::IFunction> _func;
#ifdef TFLITE_PROFILING_ENABLED
public:
+ /**
+ * @brief Get member @c _op_index
+ * @return The operation index as value
+ */
int op_idx() const { return _op_idx; }
+ /**
+ * @brief Get member @c _op_index
+ * @return The operation index as reference
+ */
int &op_idx() { return _op_idx; }
private:
int _op_idx;
@@ -139,12 +208,24 @@ namespace arm_compute
namespace op
{
+/**
+ * @brief Class managing compiled operation code Sequence
+ */
class Sequence
{
public:
+ /**
+ * @brief Get size of sequence
+ * @return Number of sequence steps
+ */
uint32_t size(void) const { return _functions.size(); }
public:
+ /**
+ * @brief Append a Function to the sequence
+ * @param[in] func Function to be appended
+ * @return This object reference
+ */
Sequence &append(std::unique_ptr<::arm_compute::IFunction> &&func)
{
_functions.emplace_back(std::move(func));
@@ -152,7 +233,17 @@ public:
}
public:
+ /**
+ * @brief Get the step entry on the index @c n
+ * @param[in] n The index
+ * @return The step object as reference
+ */
Step &at(uint32_t n) { return _functions.at(n); }
+ /**
+ * @brief Get the step entry on the index @c n
+ * @param[in] n The index
+ * @return The step object as const reference
+ */
const Step &at(uint32_t n) const { return _functions.at(n); }
private:
@@ -169,23 +260,50 @@ namespace internal
namespace arm_compute
{
+/**
+ * @brief Class to manage compiled operation sequence
+ */
class Plan
{
public:
+ /**
+ * @brief Construct a Plan object
+ * @param[in] model Model that we want to compile
+ */
Plan(const std::shared_ptr<const ::internal::tflite::Model> &model) : _model(model)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get the model object
+ * @return The model object as const reference
+ */
const ::internal::tflite::Model &model(void) const { return *_model; }
public:
+ /**
+ * @brief Get operand context
+ * @return The operand context as reference
+ */
operand::Context &operands(void) { return _operands; }
+ /**
+ * @brief Get operand context
+ * @return The operand context as const reference
+ */
const operand::Context &operands(void) const { return _operands; }
public:
+ /**
+ * @brief Get operation sequence
+ * @return The operation sequence as reference
+ */
op::Sequence &operations(void) { return _ops; }
+ /**
+ * @brief Get operation sequence
+ * @return The operation sequence as const reference
+ */
const op::Sequence &operations(void) const { return _ops; }
private:
@@ -204,7 +322,10 @@ namespace internal
namespace arm_compute
{
-// check if this runtime runs on GPU or NEON
+/**
+ * @brief Check if this runtime runs on GPU or NEON
+ * @return @c true if GPU mode, otherwise @c false
+ */
bool isGpuMode();
#define CAST_CL(tensor) static_cast<::arm_compute::CLTensor *>(tensor)
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc
new file mode 100644
index 000000000..ff2f79309
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/arm_compute/Cast.h"
+
+#include "internal/Swizzle.h"
+
+::arm_compute::Coordinates getARMComputeAxises(uint32_t rank)
+{
+ ::arm_compute::Coordinates res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.set(axis, ToARMComputeAxis(rank, axis).value());
+ }
+
+ return res;
+}
+
+::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord,
+ const ::arm_compute::Coordinates &axises)
+{
+ ::arm_compute::Coordinates id{};
+ assert(runtime_coord.num_dimensions() == axises.num_dimensions());
+ for (size_t i = 0; i < runtime_coord.num_dimensions(); ++i)
+ {
+ id.set(axises[i], runtime_coord[i]);
+ }
+ return id;
+}
+
+// Restructure runtime_permutationVector to ACL_permutationVector
+::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
+ const int32_t *runtime_pv)
+{
+ // rank upto 4 is supported
+ assert(rank <= 4);
+ assert(runtime_pv != nullptr);
+
+ int new_pv[4] = {0};
+ ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
+
+ if (rank == 4)
+ {
+ /**
+ axises = {3,1,0,2}
+ NNAPI PermutationVector
+ N 0 3
+ H 1 1
+ W 2 0
+ C 3 2
+ **/
+ new_pv[0] = axises[runtime_pv[2]];
+ new_pv[1] = axises[runtime_pv[1]];
+ new_pv[2] = axises[runtime_pv[3]];
+ new_pv[3] = axises[runtime_pv[0]];
+ }
+ else
+ {
+ /**
+ mapping/axises = {rank-1 to 0}
+ CHW --------> WHC
+ or
+ WH ----------> HW
+ **/
+ for (int id = 0; id < rank; ++id)
+ {
+ new_pv[id] = axises[runtime_pv[rank - id - 1]];
+ }
+ }
+
+ return ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+}
+
+::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
+ bool apply_dim_correction)
+{
+ const uint32_t rank = shape.rank();
+
+ ::arm_compute::TensorShape res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ // NOTE In some cases, in incorrect dimensions is required.
+ // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
+ // LSTM is used as the weight of the FullyConnected.
+ // The FullyConnected's weight must be greater or equal than 2-dimensions.
+ // However, if the dimension correction is applied to input_to_input_weights with input_size
+ // equal to 1, it will be changed to 1-D.
+ // So input_to_input_weights is not used by the weight of FullyConnected.
+ res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction);
+ }
+
+ return res;
+}
+
+::arm_compute::DataType asDataType(const int32_t type)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ return ::arm_compute::DataType::F32;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ return ::arm_compute::DataType::S32;
+ case ANEURALNETWORKS_UINT32:
+ return ::arm_compute::DataType::U32;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ return ::arm_compute::DataType::QASYMM8;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code)
+{
+ switch (code)
+ {
+ case ANEURALNETWORKS_FUSED_NONE:
+ return ::arm_compute::ActivationLayerInfo{};
+ case ANEURALNETWORKS_FUSED_RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ case ANEURALNETWORKS_FUSED_RELU1:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+ case ANEURALNETWORKS_FUSED_RELU6:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
+{
+ return ::arm_compute::QuantizationInfo(scale, offset);
+}
+
+::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type,
+ const float scale, const int32_t zeroPoint)
+{
+ return ::arm_compute::TensorInfo(shape, 1, asDataType(type),
+ asQuantizationInfo(scale, zeroPoint));
+}
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h
index e2ceb8fef..42b547feb 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h
@@ -14,104 +14,98 @@
* limitations under the License.
*/
+/**
+ * @file Cast.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines casting functions from internal object to arm compute object
+ */
#ifndef __ARM_COMPUTE_CAST_H__
+#define __ARM_COMPUTE_CAST_H__
+#include <arm_compute/core/Coordinates.h>
+#include <arm_compute/core/TensorInfo.h>
#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/Types.h>
-#include "internal/Swizzle.h"
-#include "internal/Model.h"
-
-inline ::arm_compute::Coordinates getARMComputeAxises(uint32_t rank)
-{
- ::arm_compute::Coordinates res{};
-
- res.set_num_dimensions(rank);
-
- for (uint32_t axis = 0; axis < rank; ++axis)
- {
- res.set(axis, ToARMComputeAxis(rank, axis).value());
- }
-
- return res;
-}
-
-inline ::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
- bool apply_dim_correction = true)
-{
- const uint32_t rank = shape.rank();
+#include <NeuralNetworks.h>
- ::arm_compute::TensorShape res{};
-
- res.set_num_dimensions(rank);
-
- for (uint32_t axis = 0; axis < rank; ++axis)
- {
- // NOTE In some cases, in incorrect dimensions is required.
- // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
- // LSTM is used as the weight of the FullyConnected.
- // The FullyConnected's weight must be greater or equal than 2-dimensions.
- // However, if the dimension correction is applied to input_to_input_weights with input_size
- // equal to 1, it will be changed to 1-D.
- // So input_to_input_weights is not used by the weight of FullyConnected.
- res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction);
- }
-
- return res;
-}
+#include "internal/Model.h"
-::arm_compute::DataType asDataType(const int32_t type)
-{
- switch (type)
- {
- case ANEURALNETWORKS_FLOAT32:
- case ANEURALNETWORKS_TENSOR_FLOAT32:
- return ::arm_compute::DataType::F32;
- case ANEURALNETWORKS_INT32:
- case ANEURALNETWORKS_TENSOR_INT32:
- return ::arm_compute::DataType::S32;
- case ANEURALNETWORKS_UINT32:
- return ::arm_compute::DataType::U32;
- case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
- return ::arm_compute::DataType::QASYMM8;
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
+/**
+ * @brief Generate arm compute coordinate object from rank
+ * @param[in] rank Rank number
+ * @return Coordinate object
+ */
+::arm_compute::Coordinates getARMComputeAxises(uint32_t rank);
+
+/**
+ * @brief Generate arm compute coordinate object from runtime coordinate object
+ * @param[in] runtime_coord Runtime coordinates object
+ * @param[in] axises Coordinates for axises to map runtime-coordinates to
+ * arm_compute-coordinates
+ * @return Arm_compute coordinate object
+ */
+::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord,
+ const ::arm_compute::Coordinates &axises);
+
+/**
+* @brief Generate arm compute permutation vector from runtime permutation vector
+* @param[in] rank Rank number supported upto 4
+* @param[in] runtime_pv Integer array for runtime permutation vector
+* @return Permutation vector of arm compute
+*/
+::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
+ const int32_t *runtime_pv);
+/**
+ * @brief Cast from shape of internal to arm compute
+ * @param[in] shape Internal shape object
+ * @param[in] apply_dim_correction Flag to state whether apply dimension correction after setting
+ * one dimension in arm compute
+ * @return TensorShape object of arm compute
+ */
+::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
+ bool apply_dim_correction = true);
-::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code)
-{
- switch (code)
- {
- case ANEURALNETWORKS_FUSED_NONE:
- return ::arm_compute::ActivationLayerInfo{};
- case ANEURALNETWORKS_FUSED_RELU:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
- case ANEURALNETWORKS_FUSED_RELU1:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
- case ANEURALNETWORKS_FUSED_RELU6:
- return ::arm_compute::ActivationLayerInfo{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
-}
+/**
+ * @brief Cast from data type enum of NNAPI to arm compute
+ * @param[in] type NNAPI data type
+ * @return Data type of arm compute
+ */
+::arm_compute::DataType asDataType(const int32_t type);
-::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
-{
- return ::arm_compute::QuantizationInfo(scale, offset);
-}
+/**
+ * @brief Cast from NNAPI activation type enum to activation object of arm compute
+ * @param[in] code NNAPI activation type
+ * @return ActivationLayerInfo object of arm compute
+ */
+::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code);
+/**
+ * @brief Generate quantization info object of arm compute
+ * @param[in] scale Scale of quantization
+ * @param[in] offset Offset of quantization
+ * @return QuantizationInfo object of arm compute
+ */
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset);
+
+/**
+ * @brief Cast from internal tensor info to tensor info object of arm compute
+ * @param[in] shape Tensor shape
+ * @param[in] type Tensor type
+ * @param[in] scale Scale of tensor quantization
+ * @param[in] zeroPoint Zeropoint of tensor quantization
+ * @return TensorInfo object of arm compute
+ */
::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type,
- const float scale = 0.0f, const int32_t zeroPoint = 0)
-{
- return ::arm_compute::TensorInfo(shape, 1, asDataType(type),
- asQuantizationInfo(scale, zeroPoint));
-}
-
+ const float scale = 0.0f, const int32_t zeroPoint = 0);
+
+/**
+ * @brief Set value to arm compute tensor with casting
+ * @param[in] value Value to set
+ * @param[out] to Target tensor of arm compute
+ * @param[in] id Position of element
+ * @return N/A
+ */
template <typename FromT>
void copyCast(const FromT value, ::arm_compute::ITensor *to, const ::arm_compute::Coordinates &id)
{
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h
index 9d19021ae..c989ef4c2 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h
@@ -14,10 +14,15 @@
* limitations under the License.
*/
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::arm_compute::feature::View class
+ */
#ifndef __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
#define __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
-#include "util/feature/Reader.h"
+#include "misc/feature/Reader.h"
#include <arm_compute/core/ITensor.h>
@@ -28,15 +33,29 @@ namespace arm_compute
namespace feature
{
-template <typename T> class View final : public nnfw::util::feature::Reader<T>
+/**
+ * @brief Class to access feature's element
+ */
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
{
public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Feature to support access
+ */
View(::arm_compute::ITensor *tensor) : _tensor{tensor}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get value of element in 3D feature using channel, row and column
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t ch, uint32_t row, uint32_t col) const override
{
const auto offset = feature_index_to_byte_offset(ch, row, col);
@@ -46,6 +65,14 @@ public:
return *ptr;
}
+ /**
+ * @brief Get value of element in 4D feature using batch, channel, row and column
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
{
const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
@@ -56,6 +83,13 @@ public:
}
public:
+ /**
+ * @brief Get reference of element in 3D feature using channel, row and column
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
T &at(uint32_t ch, uint32_t row, uint32_t col)
{
const auto offset = feature_index_to_byte_offset(ch, row, col);
@@ -65,6 +99,14 @@ public:
return *ptr;
}
+ /**
+ * @brief Get reference of element in 4D feature using batch, channel, row and column
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
{
const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
@@ -75,12 +117,27 @@ public:
}
private:
+ /**
+ * @brief Get offset of element in 3D feature
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
size_t feature_index_to_byte_offset(uint32_t ch, uint32_t row, uint32_t col) const
{
// ARM Compute uses CHW ordering
return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch});
}
+ /**
+ * @brief Get offset of element in 4D feature
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
{
// ARM Compute uses CHW ordering
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h
index 28054d7c8..399cdf913 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internel::arm_compute::kernel::View class
+ */
#ifndef __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__
#define __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__
-#include "util/kernel/Shape.h"
-#include "util/kernel/Reader.h"
+#include "misc/kernel/Shape.h"
+#include "misc/kernel/Reader.h"
#include <arm_compute/core/ITensor.h>
@@ -29,15 +34,30 @@ namespace arm_compute
namespace kernel
{
-template <typename T> class View final : public nnfw::util::kernel::Reader<T>
+/**
+ * @brief Class to access kernel's element
+ */
+template <typename T> class View final : public nnfw::misc::kernel::Reader<T>
{
public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Kernel to support access
+ */
View(::arm_compute::ITensor *tensor) : _tensor{tensor}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get value of element in kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
{
const auto offset = kernel_index_to_byte_offset(nth, ch, row, col);
@@ -48,6 +68,14 @@ public:
}
public:
+ /**
+ * @brief Get reference of element in kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col)
{
const auto offset = kernel_index_to_byte_offset(nth, ch, row, col);
@@ -58,6 +86,14 @@ public:
}
private:
+ /**
+ * @brief Get offset of element in kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
size_t kernel_index_to_byte_offset(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const
{
return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch, nth});
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h
index e3534294f..305fff729 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::arm_compute::matrix::View class
+ */
#ifndef __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
#define __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
-#include "util/matrix/Shape.h"
-#include "util/matrix/Reader.h"
+#include "misc/matrix/Shape.h"
+#include "misc/matrix/Reader.h"
#include <arm_compute/core/ITensor.h>
@@ -29,15 +34,28 @@ namespace arm_compute
namespace matrix
{
-template <typename T> class View final : public nnfw::util::matrix::Reader<T>
+/**
+ * @brief Class to access matrix's element
+ */
+template <typename T> class View final : public nnfw::misc::matrix::Reader<T>
{
public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Matrix to support access
+ */
View(::arm_compute::ITensor *tensor) : _tensor{tensor}
{
// DO NOTHING
}
public:
+ /**
+ * @brief Get value of element in matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t row, uint32_t col) const override
{
const auto offset = matrix_index_to_byte_offset(row, col);
@@ -48,6 +66,12 @@ public:
}
public:
+ /**
+ * @brief Get reference of element in matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Refence of element
+ */
T &at(uint32_t row, uint32_t col)
{
const auto offset = matrix_index_to_byte_offset(row, col);
@@ -58,6 +82,12 @@ public:
}
private:
+ /**
+ * @brief Get offset of element in matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
size_t matrix_index_to_byte_offset(uint32_t row, uint32_t col) const
{
return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row});
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h
index 0d8f2ab81..372bd682d 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::arm_compute::tensor::View class
+ */
#ifndef __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
#define __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
-#include "util/tensor/Shape.h"
-#include "util/tensor/Index.h"
+#include "misc/tensor/Shape.h"
+#include "misc/tensor/Index.h"
#include <arm_compute/core/ITensor.h>
@@ -29,16 +34,28 @@ namespace arm_compute
namespace tensor
{
+/**
+ * @brief Class to access tensor's element
+ */
template <typename T> class View
{
public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Tensor to support access
+ */
View(::arm_compute::ITensor *tensor) : _tensor{tensor}
{
// DO NOTHING
}
private:
- uint32_t byte_offset_of(const nnfw::util::tensor::Index &index) const
+ /**
+ * @brief Get offset of element in tensor
+ * @param[in] index Index of element
+ * @return Offset of element
+ */
+ uint32_t byte_offset_of(const nnfw::misc::tensor::Index &index) const
{
// NOTE index.rank() >= _tensor->info()->num_dimensions() should hold here
const uint32_t rank = index.rank();
@@ -56,7 +73,12 @@ private:
}
public:
- T at(const nnfw::util::tensor::Index &index) const
+ /**
+ * @brief Get value of element in tensor
+ * @param[in] index Index of element
+ * @return Value of element
+ */
+ T at(const nnfw::misc::tensor::Index &index) const
{
const auto offset = byte_offset_of(index);
@@ -65,7 +87,12 @@ public:
return *ptr;
}
- T &at(const nnfw::util::tensor::Index &index)
+ /**
+ * @brief Get reference of element in tensor
+ * @param[in] index Index of element
+ * @return Reference of element
+ */
+ T &at(const nnfw::misc::tensor::Index &index)
{
const auto offset = byte_offset_of(index);
diff --git a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
index 502a1ee0e..83ae7c17b 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        FeatureLoggingLayer.h
+ * @brief       This file contains FeatureLoggingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __FEATURE_LOGGING_LAYER_H__
#define __FEATURE_LOGGING_LAYER_H__
@@ -27,9 +33,24 @@
#include "internal/arm_compute.h"
+/**
+ * @brief Class to run FeatureLogging Layer
+ */
class FeatureLoggingLayer : public ::arm_compute::IFunction
{
public:
+ FeatureLoggingLayer(void) : _tag(""), _target(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] tag Text tag for this layer
+ * @param[in] target The feature tensor to be printed
+ * @return N/A
+ */
void configure(const std::string &tag, ::arm_compute::ITensor *target)
{
_tag = tag;
@@ -37,6 +58,10 @@ public:
}
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override
{
if (::internal::arm_compute::isGpuMode())
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
index 311284efc..28789a801 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
@@ -17,8 +17,6 @@
#include "GenericFullyConnectedLayer.h"
#include "internal/arm_compute.h"
-#include <arm_compute/core/Helpers.h>
-
void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
::arm_compute::ITensor *weights,
::arm_compute::ITensor *biases,
@@ -56,9 +54,9 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
{
// reshape
auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape));
- _generic_reshape.configure(CAST_NE(_input), &_neon_buffer);
+ _generic_reshape.configure(_input, &_neon_buffer);
- _neon_fc.configure(&_neon_buffer, CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output));
+ _neon_fc.configure(&_neon_buffer, _weights, _biases, _output);
// NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate
// here.
@@ -66,7 +64,7 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
}
else
{
- _neon_fc.configure(CAST_NE(_input), CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output));
+ _neon_fc.configure(_input, _weights, _biases, _output);
}
}
}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
index 55d8683da..f1519f54d 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
@@ -14,23 +14,52 @@
* limitations under the License.
*/
+/**
+ * @file        GenericFullyConnectedLayer.h
+ * @brief       This file contains GenericFullyConnectedLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __GENERIC_FULLY_CONNECTED_LAYER_H__
#define __GENERIC_FULLY_CONNECTED_LAYER_H__
-#include <arm_compute/runtime/Tensor.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include "internal/layers/GenericReshapeLayer.h"
+/**
+ * @brief Class to run FullyConnected Layer with both CPU and GPU
+ */
class GenericFullyConnectedLayer : public ::arm_compute::IFunction
{
public:
+ GenericFullyConnectedLayer(void)
+ : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+ _neon_buffer{}, _cl_fc{}, _neon_fc{}, _generic_reshape{}, _needs_reshape(false)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] weights The tensor that is filled with weight values
+ * @param[in] biases The tensor that is filled with biase values
+ * @param[in] output The destination tensor
+ * @param[in] needs_reshape Whether it needs to be reshaped or not
+ * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
::arm_compute::ITensor *biases, ::arm_compute::ITensor *output, bool needs_reshape,
::arm_compute::TensorShape reshape);
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
index 2cdfe1b6e..c38c2e9e3 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
@@ -43,8 +43,8 @@ void GenericReshapeLayer::configure(::arm_compute::ITensor *input, ::arm_compute
}
else
{
- _neon_permute.configure(CAST_NE(input), &_neon_permuted, pv);
- _neon_reshape.configure(&_neon_permuted, CAST_NE(output));
+ _neon_permute.configure(input, &_neon_permuted, pv);
+ _neon_reshape.configure(&_neon_permuted, output);
// NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
_neon_permuted.allocator()->allocate();
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
index 1def21085..a22c14c8b 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        GenericReshapeLayer.h
+ * @brief       This file contains GenericReshapeLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __GENERIC_RESHAPE_LAYER_H__
#define __GENERIC_RESHAPE_LAYER_H__
@@ -25,12 +31,33 @@
#include <arm_compute/runtime/NEON/functions/NEPermute.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+/**
+ * @brief Class to run Reshape Layer with both CPU and GPU
+ */
class GenericReshapeLayer : public ::arm_compute::IFunction
{
public:
+ GenericReshapeLayer(void)
+ : _input(nullptr), _output(nullptr), _cl_permuted{}, _neon_permuted{}, _cl_permute{},
+ _cl_reshape{}, _neon_permute{}, _neon_reshape{}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] input The source tensor
+ * @param[in] output The destination tensor
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc
deleted file mode 100644
index 4a5370587..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include "PadLayer.h"
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-void PadLayer::configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
- unsigned int border_width)
-{
- _input = input;
- _output = output;
- _border_width = border_width;
- _output_height = _output->info()->dimension(0);
- _output_width = _output->info()->dimension(1);
-
- uint8_t constant_border_value = 0;
- ::arm_compute::PixelValue constant_pixel_value = ::arm_compute::PixelValue(constant_border_value);
-
- unsigned int padding_size = _border_width;
- input->info()->extend_padding(::arm_compute::PaddingSize{padding_size});
- _fillborderkernel.configure(input, _border_width, ::arm_compute::BorderMode::CONSTANT,
- constant_pixel_value);
-}
-
-void PadLayer::run(void)
-{
- _fillborderkernel.run();
-
- ::arm_compute::Coordinates coordinates =
- ::arm_compute::Coordinates(-_border_width, -_border_width);
- ::arm_compute::TensorShape new_tensor_shape =
- ::arm_compute::TensorShape(_output_height, _output_width);
-
- /* NOTE: The cl kernel fills the data in the borders(not in the tensor).
- Once the tensor is received back at NNAPI, we are adjusting
- the valid region in such a way that the padding becomes part of the tensor itself
- and matches the size of output. */
- _input->info()->set_valid_region(::arm_compute::ValidRegion(coordinates, new_tensor_shape));
-
- /* NOTE: Since cl kernel does not have an argument for output tensor while NNAPI does.
- We need to map the input (tensor that is passed to the cl kernel) back to
- output. */
-
- // TODO: Write a modified CLCopy kernel to do this job.
- populateOutput();
-}
-
-void PadLayer::populateOutput()
-{
- auto &queue = ::arm_compute::CLScheduler::get().queue();
- _input->map(queue);
- _output->map(queue);
-
- auto input_tensor = static_cast<::arm_compute::ITensor *>(_input);
- auto const source_data = input_tensor->buffer();
-
- auto output_tensor = static_cast<::arm_compute::ITensor *>(_output);
- auto dst_data = output_tensor->buffer();
-
- memmove(dst_data, source_data, _output_height * _output_width * 4);
-
- _input->unmap(queue);
- _output->unmap(queue);
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc
new file mode 100644
index 000000000..6d348e814
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleArgMinMax.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleArgMinMax::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ std::vector<uint32_t> axis, ::arm_compute::ArgOperation op)
+{
+ _input = input;
+ _output = output;
+ _axis = axis;
+ _input_rank = input->info()->num_dimensions();
+ _op_type = op;
+}
+
+inline const ::arm_compute::TensorShape
+inferOutputShape(const ::arm_compute::TensorShape &input_shape, const std::vector<uint32_t> &axis,
+ int input_rank)
+{
+ ::arm_compute::TensorShape out_shape{};
+ size_t dim = 1;
+ for (int i = 0; i < input_rank; ++i)
+ {
+ dim = input_shape[i];
+ out_shape.set(i, dim);
+ }
+
+ for (int i = 0; i < axis.size(); ++i)
+ {
+ out_shape.set(axis[i], 1);
+ }
+
+ return out_shape;
+}
+
+template <typename T>
+inline T getArgMinMaxEle(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::TensorShape &output_shape, const size_t b,
+ const size_t d, const size_t h, const size_t w, const int axis,
+ const ::arm_compute::ArgOperation op_type)
+{
+ // If output[dimention] == 1, will check all values of that dimension because of reducing
+ // dimension.
+ // Else will check only one value.
+ const size_t start_b = output_shape[3] == 1 ? 0 : b;
+ const size_t start_d = output_shape[2] == 1 ? 0 : d;
+ const size_t start_h = output_shape[1] == 1 ? 0 : h;
+ const size_t start_w = output_shape[0] == 1 ? 0 : w;
+ const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b;
+ const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d;
+ const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h;
+ const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w;
+
+ ::arm_compute::Coordinates id{w, h, d, b};
+ ::arm_compute::Coordinates min_max_id{w, h, d, b};
+
+ T value = *reinterpret_cast<T *>(input->ptr_to_element(id));
+ T tval = *reinterpret_cast<T *>(input->ptr_to_element(id));
+
+ for (size_t in_b = start_b; in_b <= stop_b; ++in_b)
+ {
+ id.set(3, in_b);
+ for (size_t in_d = start_d; in_d <= stop_d; ++in_d)
+ {
+ id.set(2, in_d);
+ for (size_t in_h = start_h; in_h <= stop_h; ++in_h)
+ {
+ id.set(1, in_h);
+ for (size_t in_w = start_w; in_w <= stop_w; ++in_w)
+ {
+ id.set(0, in_w);
+ if (op_type == ::arm_compute::ArgOperation::MIN)
+ {
+ value = std::min<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
+ }
+ else if (op_type == ::arm_compute::ArgOperation::MAX)
+ {
+ value = std::max<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
+ }
+ else
+ throw std::runtime_error("This Arg operation is not supported, yet");
+
+ if (tval != value)
+ {
+ min_max_id = id;
+ tval = value;
+ }
+ }
+ }
+ }
+ }
+
+ return min_max_id[axis];
+}
+
+template <typename T>
+inline void
+getArgMinMax(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::TensorShape &output_shape, ::arm_compute::ITensor *output,
+ const int axis, const ::arm_compute::ArgOperation op_type)
+{
+ ::arm_compute::Coordinates id;
+ for (size_t out_b = 0; out_b < output_shape[3]; ++out_b)
+ {
+ id.set(3, out_b);
+ for (size_t out_d = 0; out_d < output_shape[2]; ++out_d)
+ {
+ id.set(2, out_d);
+ for (size_t out_h = 0; out_h < output_shape[1]; ++out_h)
+ {
+ id.set(1, out_h);
+ for (size_t out_w = 0; out_w < output_shape[0]; ++out_w)
+ {
+ id.set(0, out_w);
+ *reinterpret_cast<int *>(output->ptr_to_element(id)) = getArgMinMaxEle<T>(
+ input, input_shape, output_shape, out_b, out_d, out_h, out_w, axis, op_type);
+ }
+ }
+ }
+ }
+}
+
+void SimpleArgMinMax::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ ::arm_compute::TensorShape input_shape = _input->info()->tensor_shape();
+
+ // Axis dimension is 1 and size is 1.
+ // TODO support axis size > 1.
+ int axis_val = _axis[0];
+ ::arm_compute::TensorShape output_shape = inferOutputShape(input_shape, _axis, _input_rank);
+
+ _output->info()->set_tensor_shape(output_shape);
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::QASYMM8:
+ getArgMinMax<uint8_t>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+ break;
+ case ::arm_compute::DataType::S32:
+ getArgMinMax<int32_t>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+ break;
+ case ::arm_compute::DataType::F32:
+ getArgMinMax<float>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ _output->info()->set_tensor_shape(output_shape);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h
new file mode 100644
index 000000000..b90e74579
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_ARG_MIN_MAX_H__
+#define __SIMPLE_ARG_MIN_MAX_H__
+
+#include "internal/arm_compute.h"
+#include "arm_compute/core/TypesEx.h"
+
+class SimpleArgMinMax : public ::arm_compute::IFunction
+{
+public:
+ SimpleArgMinMax(void) : _input(nullptr), _output(nullptr), _axis(), _input_rank(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] axis Dimension along which to find Min or Max Index.
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ std::vector<uint32_t> axis, ::arm_compute::ArgOperation _op_type);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ std::vector<uint32_t> _axis;
+ int _input_rank;
+ ::arm_compute::ArgOperation _op_type;
+};
+
+#endif /*__SIMPLE_ARG_MIN_MAX_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
index 31c927b4f..aed9ae286 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
@@ -14,15 +14,36 @@
* limitations under the License.
*/
+/**
+ * @file        SimpleArithmeticAddition.h
+ * @brief       This file contains SimpleArithmeticAddition class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __SIMPLE_ARITHMETIC_ADDITION_H__
#define __SIMPLE_ARITHMETIC_ADDITION_H__
#include "internal/arm_compute.h"
#include <arm_compute/core/ITensor.h>
+/**
+ * @brief Class to run SimpleArithmeticAddition Layer
+ */
class SimpleArithmeticAddition : public ::arm_compute::IFunction
{
public:
+ SimpleArithmeticAddition(void) : _lhs(nullptr), _rhs(nullptr), _out(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @brief Configure the layer
+ * @param[in] lhs Lefthand-side operand
+ * @param[in] rhs Righthand-side operand
+ * @param[in] out The destination tensor(Result operand)
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *lhs, ::arm_compute::ITensor *rhs,
::arm_compute::ITensor *out)
{
@@ -32,6 +53,10 @@ public:
}
public:
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run(void) override
{
if (::internal::arm_compute::isGpuMode())
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc
new file mode 100644
index 000000000..87175ee1a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleBatchToSpaceNd.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleBatchToSpaceND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ const int32_t *block_size,
+ const ::arm_compute::Coordinates &axises)
+{
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+
+ for (int i = 0; i < rank; ++i)
+ assert(axises[i] >= 0 && axises[i] < rank);
+
+ _input = input;
+ _output = output;
+ _block_size = block_size;
+ _axises = axises;
+}
+
+template <typename T>
+inline void BatchToSpaceND(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape,
+ const int32_t *block_size_data, ::arm_compute::ITensor *output,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int depth = output_shape[axises[3]];
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < depth; ++out_d)
+ {
+ const int in_d = out_d;
+ const int in_h = out_h / block_size_data[0];
+ const int in_w = out_w / block_size_data[1];
+ const int in_b =
+ out_b +
+ ((out_h % block_size_data[0]) * block_size_data[1] + out_w % block_size_data[1]) *
+ output_batch;
+
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+}
+void SimpleBatchToSpaceND::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ BatchToSpaceND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ BatchToSpaceND<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h
new file mode 100644
index 000000000..5695d9719
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h
@@ -0,0 +1,51 @@
+/*
+ *Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_BATCH_TO_SPACE_ND_H__
+#define __SIMPLE_BATCH_TO_SPACE_ND_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleBatchToSpaceND : public ::arm_compute::IFunction
+{
+public:
+ SimpleBatchToSpaceND(void) : _input(nullptr), _output(nullptr), _block_size(nullptr), _axises{}
+ {
+ // DO NOTHING
+ }
+
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] block_size Block size.
+ * @param[in] axises Axises of rank 4
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ const int32_t *block_size,
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ const int32_t *_block_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_BATCH_TO_SPACE_ND_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc
new file mode 100644
index 000000000..7c7706a78
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/layers/SimpleCastLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleCastLayer::castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out,
+ const arm_compute::Coordinates &id)
+{
+ switch (in->info()->data_type())
+ {
+ case ::arm_compute::DataType::F32:
+ {
+ copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id);
+ break;
+ }
+ case ::arm_compute::DataType::S32:
+ {
+ copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id);
+ break;
+ }
+ case ::arm_compute::DataType::U32:
+ {
+ copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id);
+ break;
+ }
+ case ::arm_compute::DataType::QASYMM8:
+ {
+ const uint8_t quantizedValue = *(in->ptr_to_element(id));
+ copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id);
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+void SimpleCastLayer::configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out)
+{
+ _in = in;
+ _out = out;
+}
+
+void SimpleCastLayer::run(void)
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+ CAST_CL(_in)->map(q);
+ CAST_CL(_out)->map(q);
+ }
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_out->info()->tensor_shape());
+
+ execute_window_loop(window,
+ [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); });
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+ CAST_CL(_out)->unmap(q);
+ CAST_CL(_in)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
index fa3006438..f9a48b481 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
@@ -14,80 +14,55 @@
* limitations under the License.
*/
+/**
+ * @file        SimpleCastLayer.h
+ * @brief       This file contains SimpleCastLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __SIMPLE_CAST_LAYER_H__
#define __SIMPLE_CAST_LAYER_H__
-#include <arm_compute/core/ITensor.h>
-
#include "internal/arm_compute.h"
-#include "internal/op/Cast.h"
+#include "internal/arm_compute/Cast.h"
+/**
+ * @brief Class to run SimpleCast Layer
+ */
class SimpleCastLayer : public ::arm_compute::IFunction
{
public:
- void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out)
+ SimpleCastLayer(void) : _in(nullptr), _out(nullptr)
{
- _in = in;
- _out = out;
+ // DO NOTHING
}
-public:
- void run(void) override
- {
- if (::internal::arm_compute::isGpuMode())
- {
- auto &q = ::arm_compute::CLScheduler::get().queue();
- CAST_CL(_in)->map(q);
- CAST_CL(_out)->map(q);
- }
-
- arm_compute::Window window;
- window.use_tensor_dimensions(_out->info()->tensor_shape());
+ /**
+ * @brief Configure the layer
+ * @param[in] in The source tensor
+ * @param[in] out The destination tensor
+ * @return N/A
+ */
+ void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out);
- execute_window_loop(window,
- [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); });
-
- if (::internal::arm_compute::isGpuMode())
- {
- auto &q = ::arm_compute::CLScheduler::get().queue();
- CAST_CL(_out)->unmap(q);
- CAST_CL(_in)->unmap(q);
- }
- }
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
+ void run(void) override;
+private:
+ /**
+ * @brief Cast and copy data from one tensor to another
+ *
+ * @param[in] in The source tensor
+ * @param[out] out The destination tensor
+ * @param[in] id Coordinates to copy
+ * @return N/A
+ */
void castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out,
- const arm_compute::Coordinates &id)
- {
- switch (in->info()->data_type())
- {
- case ::arm_compute::DataType::F32:
- {
- copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id);
- break;
- }
- case ::arm_compute::DataType::S32:
- {
- copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id);
- break;
- }
- case ::arm_compute::DataType::U32:
- {
- copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id);
- break;
- }
- case ::arm_compute::DataType::QASYMM8:
- {
- const uint8_t quantizedValue = *(in->ptr_to_element(id));
- copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id);
- break;
- }
- default:
- throw std::runtime_error("Not supported, yet");
- break;
- }
- }
+ const arm_compute::Coordinates &id);
-private:
::arm_compute::ITensor *_in;
::arm_compute::ITensor *_out;
};
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
new file mode 100644
index 000000000..d62a8321b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleDepthToSpace.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ int32_t block_size, const ::arm_compute::Coordinates &axises)
+{
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+ for (int i = 0; i < rank; ++i)
+ {
+ assert(axises[i] >= 0);
+ assert(axises[i] < rank);
+ }
+
+ _input = input;
+ _output = output;
+ _block_size = block_size;
+ _axises = axises;
+}
+
+template <typename T>
+inline void DepthToSpace(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+ ::arm_compute::ITensor *output,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < output_depth; ++out_d)
+ {
+ const int in_b = out_b;
+ const int in_h = out_h / block_size;
+ const int in_w = out_w / block_size;
+ const int in_d =
+ out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
+
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+}
+
+void SimpleDepthToSpace::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
new file mode 100644
index 000000000..1032aaa47
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_DEPTH_TO_SPACE_H__
+#define __SIMPLE_DEPTH_TO_SPACE_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleDepthToSpace : public ::arm_compute::IFunction
+{
+public:
+ SimpleDepthToSpace(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[out] output Output tensor.
+ * @param[in] block_size Block size.
+ * @param[in] axises Axises of rank 4
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ int32_t _block_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
index 089c783c1..ae740bb10 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#include "internal/layers/SimpleEmbeddingLookup.h"
#include <arm_compute/runtime/CL/CLScheduler.h>
@@ -6,7 +21,8 @@ void SimpleEmbeddingLookup::configure(::arm_compute::ITensor *lookups,
::arm_compute::ITensor *values,
::arm_compute::ITensor *output)
{
- // Assume that verification of operands are already done at Planner::visit()
+ assert(values->info()->num_dimensions() == output->info()->num_dimensions());
+ assert(values->info()->num_dimensions() > 1 && values->info()->num_dimensions() <= 4);
_lookups = lookups;
_values = values;
_output = output;
@@ -25,85 +41,62 @@ void SimpleEmbeddingLookup::run()
// type of elements of lookups is always integer
const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
- const auto values_buf = _values->buffer();
- auto output_buf = _output->buffer();
const auto lookups_info = _lookups->info();
const auto values_info = _values->info();
const auto output_info = _output->info();
- // TODO Refactor below duplicated code!
- const auto values_rank = values_info->num_dimensions();
- switch (values_rank)
+ // NOTE The first dimension's position is always at the end of dimensions.
+ const auto first_dim_pos = values_info->num_dimensions() - 1;
+
+ const size_t first_dim = values_info->dimension(first_dim_pos);
+ for (size_t i = 0; i < lookups_info->dimension(0); ++i)
{
- case 2:
- // (H,W) in nnapi -> (W,H) in acl
- {
- const size_t row_size = values_info->dimension(1);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 3:
- // (B,H,W) in nnapi -> (W,H,B) in acl
- {
- const size_t row_size = values_info->dimension(2);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 4:
- // (N,H,W,C) in nnapi -> (N,C,H,W) in acl
- {
- const size_t row_size = values_info->dimension(3);
- const size_t row_bytes = values_info->total_size() / row_size;
- for (size_t i = 0; i < lookups_info->dimension(0); ++i)
- {
- if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
- throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
- size_t idx = lookups_buf[i];
- size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, 0, idx});
- size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, 0, i});
-
- unsigned char *sink_addr = output_buf + row_offset_by_i;
- unsigned char *source_addr = values_buf + row_offset_by_idx;
- memcpy(sink_addr, source_addr, row_bytes);
- }
- }
- break;
- case 1:
- // In this case, shape of values actually is matrix but the height(row size) is 1 in acl. If
- // row size is 1, this op is not needed and it means this situtation could be wrong.
- throw std::runtime_error("Wrong usage of EmbeddingLookup op!");
- default:
- throw std::runtime_error("Not supported rank!");
+ if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim)
+ throw std::runtime_error("Embedding Lookup: index out of bounds.");
}
+ // If each strides of values and output are different, applied padding size of the two tensors are
+ // different, therefore, it can not be copied at once.
+ auto can_copy_at_once = [&]() -> bool {
+ const auto &values_strides = values_info->strides_in_bytes();
+ const auto &output_strides = output_info->strides_in_bytes();
+
+ for (size_t i = 0; i < first_dim_pos; ++i)
+ {
+ if (values_strides[i] != values_strides[i])
+ return false;
+ }
+
+ return true;
+ };
+
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+
+ size_t copy_bytes;
+ Window window;
+ if (can_copy_at_once())
+ {
+ copy_bytes = values_info->total_size() / first_dim;
+ window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+ }
+ else
+ {
+ copy_bytes = values_info->dimension(0) * values_info->element_size();
+ window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+ }
+
+ Iterator it(_output, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ ::arm_compute::Coordinates values_id = id;
+ const int idx = id[first_dim_pos];
+ values_id.set(first_dim_pos, lookups_buf[idx]);
+ memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+ },
+ it);
+
if (::internal::arm_compute::isGpuMode())
{
auto &q = ::arm_compute::CLScheduler::get().queue();
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
index 9f2cd977f..fd499437f 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
@@ -1,16 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#ifndef __SIMPLE_EMBEDDING_LOOKUP_H__
#define __SIMPLE_EMBEDDING_LOOKUP_H__
#include "internal/arm_compute.h"
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/runtime/IFunction.h>
+/**
+ * @file        SimpleEmbeddingLookup.h
+ * @brief       This file contains SimpleEmbeddingLookup class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+/**
+ * @brief Class to run SimpleEmbeddingLookup Layer
+ */
class SimpleEmbeddingLookup : public ::arm_compute::IFunction
{
public:
+ SimpleEmbeddingLookup(void) : _lookups(nullptr), _values(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Configure the layer
+ * @param[in] lookups 1D tensor which contains lookup values
+ * @param[in] values The source tensor
+ * @param[in] output The destination tensor
+ * @return N/A
+ */
void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *values,
::arm_compute::ITensor *output);
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run() override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
new file mode 100644
index 000000000..7f8ae2505
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleHashtableLookupLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups,
+ ::arm_compute::ITensor *keys,
+ ::arm_compute::ITensor *values,
+ ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *hits)
+{
+ _lookups = lookups;
+ _keys = keys;
+ _values = values;
+ _output = output;
+ _hits = hits;
+ _lookup_indices.resize(lookups->info()->dimension(0), -1);
+}
+
+void SimpleHashtableLookupLayer::run()
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_lookups)->map(queue);
+ CAST_CL(_keys)->map(queue);
+ CAST_CL(_values)->map(queue);
+ CAST_CL(_output)->map(queue);
+ CAST_CL(_hits)->map(queue);
+ }
+
+ const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
+ const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer());
+ uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
+
+ const auto lookups_info = _lookups->info();
+ const auto values_info = _values->info();
+ const auto keys_info = _keys->info();
+ const auto output_info = _output->info();
+
+ // NOTE The first dimension's position must be always at the end of dimensions.
+ const auto first_dim_pos = values_info->num_dimensions() - 1;
+ const size_t first_dim = values_info->dimension(first_dim_pos);
+
+ std::map<int32_t, size_t> key_map;
+ const int keys_num = keys_info->dimension(0);
+ for (size_t key_index = 0; key_index < keys_num; key_index++)
+ {
+ key_map[keys_buf[key_index]] = key_index;
+ }
+
+ const int lookups_num = lookups_info->dimension(0);
+ for (size_t i = 0; i < lookups_num; ++i)
+ {
+ const auto lookup_value = lookups_buf[i];
+ const auto it = key_map.find(lookup_value);
+ if (it != key_map.end())
+ {
+ if (it->second >= first_dim)
+ throw std::runtime_error("HashTable Lookup: index out of bounds.");
+ _lookup_indices[i] = it->second;
+ }
+ }
+
+ // If each strides of values and output are different, applied padding size of the two tensors are
+ // different, therefore, it can not be copied at once.
+ auto can_copy_at_once = [&]() -> bool {
+ const auto &values_strides = values_info->strides_in_bytes();
+ const auto &output_strides = output_info->strides_in_bytes();
+
+ for (size_t i = 0; i < first_dim_pos; ++i)
+ {
+ if (values_strides[i] != values_strides[i])
+ return false;
+ }
+
+ return true;
+ };
+
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Coordinates;
+
+ size_t copy_bytes;
+ Window window;
+ if (can_copy_at_once())
+ {
+ copy_bytes = values_info->total_size() / first_dim;
+ window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+ }
+ else
+ {
+ copy_bytes = values_info->dimension(0) * values_info->element_size();
+ window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+ }
+
+ Iterator it(_output, window);
+ execute_window_loop(window,
+ [&](const Coordinates &id) {
+ Coordinates values_id = id;
+ const int idx = id[first_dim_pos];
+ const int lookup_index = _lookup_indices[idx];
+ if (lookup_index >= 0)
+ {
+ values_id.set(first_dim_pos, lookup_index);
+ memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+ hits_buf[lookup_index] = 1;
+ }
+ else
+ {
+ memset(it.ptr(), 0, copy_bytes);
+ hits_buf[lookup_index] = 0;
+ }
+ },
+ it);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_lookups)->unmap(queue);
+ CAST_CL(_keys)->unmap(queue);
+ CAST_CL(_values)->unmap(queue);
+ CAST_CL(_output)->unmap(queue);
+ CAST_CL(_hits)->unmap(queue);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h
new file mode 100644
index 000000000..ba9d2ec0d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_HASHTABLE_LOOKUP_H__
+#define __SIMPLE_HASHTABLE_LOOKUP_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleHashtableLookupLayer : public ::arm_compute::IFunction
+{
+public:
+ SimpleHashtableLookupLayer(void)
+ : _lookups(nullptr), _keys(nullptr), _values(nullptr), _output(nullptr), _hits(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *keys,
+ ::arm_compute::ITensor *values, ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *hits);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_lookups;
+ ::arm_compute::ITensor *_keys;
+ ::arm_compute::ITensor *_values;
+ ::arm_compute::ITensor *_output;
+ ::arm_compute::ITensor *_hits;
+ std::vector<int32_t> _lookup_indices;
+};
+
+#endif /*__SIMPLE_HASHTABLE_LOOKUP_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc
new file mode 100644
index 000000000..d3943ad40
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleNeg.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleNeg::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void SimpleNeg::run()
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->map(queue);
+ CAST_CL(_output)->map(queue);
+ }
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+ execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+ // NOTE Must be two input tensors of identical type
+ // Must be output tensor of the same type as input0.
+ assert(_input->info()->data_type() == _output->info()->data_type());
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::F32:
+ {
+ const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id));
+ *reinterpret_cast<float *>(_output->ptr_to_element(id)) = -input_value;
+ break;
+ }
+ case ::arm_compute::DataType::S32:
+ {
+ const auto input_value = *reinterpret_cast<int32_t *>(_input->ptr_to_element(id));
+ *reinterpret_cast<int32_t *>(_output->ptr_to_element(id)) = -input_value;
+ break;
+ }
+ case ::arm_compute::DataType::U32:
+ {
+ const auto input_value = *reinterpret_cast<uint32_t *>(_input->ptr_to_element(id));
+ *reinterpret_cast<uint32_t *>(_output->ptr_to_element(id)) = -input_value;
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+ });
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->unmap(queue);
+ CAST_CL(_output)->unmap(queue);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h
index cb3f36337..4ca88e7f8 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h
@@ -1,41 +1,39 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __PAD_LAYER_H__
-#define __PAD_LAYER_H__
-
-#include <arm_compute/runtime/CL/CLTensor.h>
-#include <arm_compute/runtime/CL/functions/CLFillBorder.h>
-
-class PadLayer : public ::arm_compute::IFunction
-{
-public:
- void configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
- unsigned int border_width);
- void run(void) override;
-
-private:
- ::arm_compute::ICLTensor *_input;
- ::arm_compute::ICLTensor *_output;
- int _border_width;
- int _output_height;
- int _output_width;
-
- ::arm_compute::CLFillBorder _fillborderkernel;
- void populateOutput();
-};
-
-#endif // __PAD_LAYER_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_NEG_H__
+#define __SIMPLE_NEG_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleNeg : public ::arm_compute::IFunction
+{
+public:
+ SimpleNeg(void) : _input(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_NEG_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc
new file mode 100644
index 000000000..2a0a25f0c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/arm_compute.h"
+#include "SimplePackLayer.h"
+
+void SimplePackLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_vector,
+ ::arm_compute::ICLTensor *output, int32_t axis)
+{
+ uint32_t nr_inputs = input_vector.size();
+ uint32_t output_rank = output->info()->num_dimensions();
+ const ::arm_compute::PermutationVector pv{1, 2, 0};
+ _cl_permuted_vector.resize(nr_inputs);
+ _cl_permute_vector.resize(nr_inputs);
+
+ _output = output;
+ // A negative axis implies axis from the end.
+ // For example, axis = -1 implies the first axis from the end, i.e. axis = Rank - 1.
+ // Similarly, axis = -2 imples second axis from the end, i.e. axis = Rank - 2.
+ if (axis < 0)
+ {
+ axis += output_rank;
+ }
+ _axis = ToARMComputeAxis(output_rank, axis).value();
+ _cl_reshape_vector.resize(nr_inputs);
+
+ ::arm_compute::TensorShape subTensor_shape{};
+ for (int i = 0; i < output_rank; i++)
+ {
+ if (i != _axis)
+ {
+ subTensor_shape.set(i, _output->info()->tensor_shape()[i]);
+ }
+ else
+ {
+ subTensor_shape.set(i, 1);
+ }
+ }
+
+ auto subTensor_offset = ::arm_compute::Coordinates{};
+ subTensor_offset.set_num_dimensions(output_rank);
+
+ for (int i = 0; i < input_vector.size(); i++)
+ {
+ _input_vector.push_back(input_vector[i]);
+ subTensor_offset[_axis] = i;
+ auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+ CAST_CL(_output), subTensor_shape, subTensor_offset, true);
+ _sub_tensor_vector.push_back(temp_tensor);
+ // configure to resize of input tensor in sub tensor offseted, dimension expansion will be
+ // automatic
+ _cl_permute_vector[i].configure(CAST_CL(_input_vector[i]), &_cl_permuted_vector[i], pv);
+ _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], _sub_tensor_vector[i].get());
+ _cl_permuted_vector[i].allocator()->allocate();
+ }
+}
+
+void SimplePackLayer::run(void)
+{
+ for (int i = 0; i < _input_vector.size(); i++)
+ {
+ _cl_permute_vector[i].run();
+ _cl_reshape_vector[i].run();
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h
new file mode 100644
index 000000000..2c2fc37f2
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __SIMPLE_PACK_LAYER_H__
+#define __SIMPLE_PACK_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+class SimplePackLayer : public ::arm_compute::IFunction
+{
+public:
+ SimplePackLayer(void)
+ : _cl_permuted_vector{}, _input_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
+ _cl_permute_vector{}, _output(nullptr), _axis(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(const std::vector<::arm_compute::ICLTensor *> &input_vector,
+ ::arm_compute::ICLTensor *output, int axis);
+
+public:
+ void run(void) override;
+
+private:
+ std::vector<::arm_compute::CLTensor> _cl_permuted_vector;
+ std::vector<::arm_compute::ICLTensor *> _input_vector;
+ std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector;
+ std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector;
+ std::vector<::arm_compute::CLPermute> _cl_permute_vector;
+ ::arm_compute::ICLTensor *_output;
+ int _axis;
+};
+
+#endif // __SIMPLE_PACK_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
new file mode 100644
index 000000000..64236603f
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimplePadLayer.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace
+{
+bool validate_arg(const ::arm_compute::ITensor *input, const ::arm_compute::ITensor *output,
+ const ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::Coordinates &axises)
+{
+ const int input_batch = input->info()->tensor_shape()[axises[0]];
+ const int input_height = input->info()->tensor_shape()[axises[1]];
+ const int input_width = input->info()->tensor_shape()[axises[2]];
+ const int input_depth = input->info()->tensor_shape()[axises[3]];
+
+ const int output_batch = output->info()->tensor_shape()[axises[0]];
+ const int output_height = output->info()->tensor_shape()[axises[1]];
+ const int output_width = output->info()->tensor_shape()[axises[2]];
+ const int output_depth = output->info()->tensor_shape()[axises[3]];
+
+ auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0}));
+ auto pad_batch_down = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 0}));
+ auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+ auto pad_height_bottom = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 1}));
+ auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+ auto pad_width_right = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 2}));
+ auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3}));
+ auto pad_depth_back = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 3}));
+
+ const int padded_batch = input_batch + pad_batch_up + pad_batch_down;
+ const int padded_height = input_height + pad_height_top + pad_height_bottom;
+ const int padded_width = input_width + pad_width_left + pad_width_right;
+ const int padded_depth = input_depth + pad_depth_front + pad_depth_back;
+
+ return (padded_batch == output_batch) && (padded_height == output_height) &&
+ (padded_width == output_width) && (padded_depth == output_depth);
+}
+} // namespace
+
+void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::Coordinates &axises)
+{
+
+ const auto rank = axises.num_dimensions();
+ assert(rank == 4);
+ assert(input != nullptr && output != nullptr && padding_size != nullptr);
+
+ for (int i = 0; i < rank; ++i)
+ {
+ assert(axises[i] >= 0);
+ assert(axises[i] < rank);
+ }
+
+ _input = input;
+ _output = output;
+ _padding_size = padding_size;
+ _axises = axises;
+}
+
+template <typename T>
+inline void ApplyPadding(const ::arm_compute::ITensor *input_data,
+ const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::ITensor *padding_size,
+ ::arm_compute::ITensor *output_data,
+ const ::arm_compute::TensorShape &output_shape,
+ const ::arm_compute::Coordinates &axises, T zero_value)
+{
+
+ assert(validate_arg(input_data, output_data, padding_size, axises) &&
+ "Padded Input shape does not match to output shape");
+
+ const int input_batch = input_shape[axises[0]];
+ const int input_height = input_shape[axises[1]];
+ const int input_width = input_shape[axises[2]];
+ const int input_depth = input_shape[axises[3]];
+
+ const int output_batch = output_shape[axises[0]];
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ // Padding size for Up, Top, Left and Front are required.
+ auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0}));
+ auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+ auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+ auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3}));
+
+ for (int out_b = 0; out_b < output_batch; ++out_b)
+ {
+ for (int out_h = 0; out_h < output_height; ++out_h)
+ {
+ for (int out_w = 0; out_w < output_width; ++out_w)
+ {
+ for (int out_d = 0; out_d < output_depth; ++out_d)
+ {
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+ if (out_b < pad_batch_up || out_b >= (input_batch + pad_batch_up) ||
+ out_h < pad_height_top || out_h >= (input_height + pad_height_top) ||
+ out_w < pad_width_left || out_w >= (input_width + pad_width_left) ||
+ out_d < pad_depth_front || out_d >= (input_depth + pad_depth_front))
+ {
+ *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = zero_value;
+ }
+ else
+ {
+ auto input_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b - pad_batch_up, out_h - pad_height_top,
+ out_w - pad_width_left, out_d - pad_depth_front},
+ axises);
+ *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+ }
+}
+void SimplePadLayer::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_output)->map(q);
+ CAST_CL(_padding_size)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ ApplyPadding<uint8_t>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+ _output->info()->tensor_shape(), _axises,
+ _input->info()->quantization_info().offset);
+ break;
+ case ::arm_compute::DataType::F32:
+ ApplyPadding<float>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+ _output->info()->tensor_shape(), _axises, 0.0f);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ CAST_CL(_padding_size)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
new file mode 100644
index 000000000..8cb6659ce
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_PAD_LAYER_H__
+#define __SIMPLE_PAD_LAYER_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimplePadLayer : public ::arm_compute::IFunction
+{
+public:
+ SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{}
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+ ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+ void run(void) override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+ ::arm_compute::ITensor *_padding_size;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif // __SIMPLE_PAD_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc
new file mode 100644
index 000000000..b5b3a0950
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSQRT.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSQRT::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output)
+{
+ _input = input;
+ _output = output;
+}
+
+void SimpleSQRT::run()
+{
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->map(queue);
+ CAST_CL(_output)->map(queue);
+ }
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+ execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+ // NOTE Must be two input tensors of identical type
+ // Must be output tensor of the same type as input0.
+ assert(_input->info()->data_type() == _output->info()->data_type());
+
+ const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id));
+ *reinterpret_cast<float *>(_output->ptr_to_element(id)) = sqrt(input_value);
+ });
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ CAST_CL(_input)->unmap(queue);
+ CAST_CL(_output)->unmap(queue);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h
new file mode 100644
index 000000000..b05a9e32e
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SQRT_H__
+#define __SIMPLE_SQRT_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleSQRT : public ::arm_compute::IFunction
+{
+public:
+ SimpleSQRT(void) : _input(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_SQRT_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
new file mode 100644
index 000000000..f53675b99
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSpaceToBatchND.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input,
+ ::arm_compute::ITensor *block_size,
+ ::arm_compute::ITensor *padding_size,
+ ::arm_compute::ITensor *output)
+{
+ const auto rank = input->info()->num_dimensions();
+ assert(rank == 4);
+
+ _input = input;
+ _block_size = block_size;
+ _padding_size = padding_size;
+ _output = output;
+}
+
+template <typename T>
+inline void
+SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+ const ::arm_compute::ITensor *block_size, const ::arm_compute::ITensor *padding_size,
+ const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape,
+ T zero_value)
+{
+ const int input_batch = input_shape[3];
+ const int input_height = input_shape[1];
+ const int input_width = input_shape[0];
+
+ const int depth = output_shape[2];
+
+ const int padding_height_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 1}));
+ const int padding_height_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 1}));
+ const int padding_width_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 0}));
+ const int padding_width_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 0}));
+ const int padded_height = input_height + padding_height_left + padding_height_right;
+ const int padded_width = input_width + padding_width_left + padding_width_right;
+
+ const int block_size_height = *reinterpret_cast<int *>(block_size->ptr_to_element({1}));
+ const int block_size_width = *reinterpret_cast<int *>(block_size->ptr_to_element({0}));
+
+ assert(padding_height_left >= 0);
+ assert(padding_height_right >= 0);
+ assert(padding_width_left >= 0);
+ assert(padding_width_right >= 0);
+ assert(block_size_height >= 1);
+ assert(block_size_width >= 1);
+ assert(padded_height % block_size_height == 0);
+ assert(padded_width % block_size_width == 0);
+ assert(output->info()->dimension(3) ==
+ input->info()->dimension(3) * (block_size_height * block_size_width));
+
+ for (int in_b = 0; in_b < input_batch; ++in_b)
+ {
+ for (int in_d = 0; in_d < depth; ++in_d)
+ {
+ for (int in_h = 0; in_h < padded_height; ++in_h)
+ {
+ for (int in_w = 0; in_w < padded_width; ++in_w)
+ {
+ const int out_d = in_d;
+ const int out_h = in_h / block_size_height;
+ const int out_w = in_w / block_size_width;
+ const int out_b =
+ in_b +
+ ((in_h % block_size_height) * block_size_width + in_w % block_size_width) *
+ input_batch;
+
+ const ::arm_compute::Coordinates output_id{out_w, out_h, out_d, out_b};
+
+ if (in_h < padding_height_left || in_h >= (input_height + padding_height_left) ||
+ in_w < padding_width_left || in_w >= (input_width + padding_width_left))
+ {
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = zero_value;
+ }
+ else
+ {
+ const ::arm_compute::Coordinates input_id{in_w - padding_width_left,
+ in_h - padding_height_left, in_d, in_b};
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+ }
+ }
+ }
+ }
+ }
+}
+void SimpleSpaceToBatchND::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_block_size)->map(q);
+ CAST_CL(_padding_size)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::U8:
+ case ::arm_compute::DataType::QASYMM8:
+ SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+ _output, _output->info()->tensor_shape(),
+ _input->info()->quantization_info().offset);
+ break;
+ case ::arm_compute::DataType::F32:
+ SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+ _output, _output->info()->tensor_shape(), 0.0f);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_block_size)->unmap(q);
+ CAST_CL(_padding_size)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
new file mode 100644
index 000000000..4af961d34
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SPACE_TO_BATCHND_H__
+#define __SIMPLE_SPACE_TO_BATCHND_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleSpaceToBatchND : public ::arm_compute::IFunction
+{
+public:
+ SimpleSpaceToBatchND(void)
+ : _input(nullptr), _block_size(nullptr), _padding_size(nullptr), _output(nullptr)
+ {
+ // DO NOTHING
+ }
+
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[in] block_size Block size.
+ * @param[in] padding_size Padding size.
+ * @param[out] output Output tensor.
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *block_size,
+ ::arm_compute::ITensor *padding_size, ::arm_compute::ITensor *output);
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_block_size;
+ ::arm_compute::ITensor *_padding_size;
+ ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
index 682295f81..3519da1f3 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
@@ -19,11 +19,8 @@
#include <arm_compute/runtime/CL/CLScheduler.h>
void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
- int32_t block_size,
- const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+ int32_t block_size, const ::arm_compute::Coordinates &axises)
{
- assert(input->info()->num_dimensions() == 4);
- assert(output->info()->num_dimensions() == 4);
const auto rank = axises.num_dimensions();
assert(rank == 4);
for (int i = 0; i < rank; ++i)
@@ -38,26 +35,10 @@ void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute:
_axises = axises;
}
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
- int32_t d, const ::arm_compute::Coordinates &axises)
-{
- // b, h, w, d >= 0
- size_t indexes[4];
- indexes[axises[0]] = b;
- indexes[axises[1]] = h;
- indexes[axises[2]] = w;
- indexes[axises[3]] = d;
-
- int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
- offset += indexes[2] * shape[1] * shape[0];
- offset += indexes[1] * shape[0];
- offset += indexes[0];
- return offset;
-}
-
template <typename T>
-inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape,
- int32_t block_size, T *output_data,
+inline void SpaceToDepth(const ::arm_compute::ITensor *input,
+ const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+ ::arm_compute::ITensor *output,
const ::arm_compute::TensorShape &output_shape,
const ::arm_compute::Coordinates &axises)
{
@@ -66,16 +47,6 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
const int input_width = input_shape[axises[2]];
const int input_depth = input_shape[axises[3]];
- const int output_batch = output_shape[axises[0]];
- const int output_height = output_shape[axises[1]];
- const int output_width = output_shape[axises[2]];
- const int output_depth = output_shape[axises[3]];
-
- assert(input_batch == output_batch);
- assert(input_height == output_height * block_size);
- assert(input_width == output_width * block_size);
- assert(input_depth * block_size * block_size == output_depth);
-
for (int in_b = 0; in_b < input_batch; ++in_b)
{
for (int in_h = 0; in_h < input_height; ++in_h)
@@ -90,10 +61,13 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
const int out_d =
in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
- const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
- const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+ auto input_id =
+ asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
- output_data[output_index] = input_data[input_index];
+ *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+ *reinterpret_cast<T *>(input->ptr_to_element(input_id));
}
}
}
@@ -110,35 +84,16 @@ void SimpleSpaceToDepth::run()
CAST_CL(_output)->map(q);
}
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
switch (_input->info()->data_type())
{
case ::arm_compute::DataType::U8:
case ::arm_compute::DataType::QASYMM8:
- SpaceToDepth(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<uint8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
- break;
- case ::arm_compute::DataType::S8:
- SpaceToDepth(reinterpret_cast<const int8_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<int8_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
- break;
- case ::arm_compute::DataType::U32:
- SpaceToDepth(reinterpret_cast<const uint32_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<uint32_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
- break;
- case ::arm_compute::DataType::S32:
- SpaceToDepth(reinterpret_cast<const int32_t *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<int32_t *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
case ::arm_compute::DataType::F32:
- SpaceToDepth(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
- _block_size, reinterpret_cast<float *>(output_buf),
- _output->info()->tensor_shape(), _axises);
+ SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+ _output->info()->tensor_shape(), _axises);
break;
default:
ARM_COMPUTE_ERROR("DataType not supported");
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
index f5e028b1c..9e87c364c 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
@@ -14,25 +14,44 @@
* limitations under the License.
*/
+/**
+ * @file        SimpleSpaceToDepth.h
+ * @brief       This file contains SimpleSpaceToDepth class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __SIMPLE_SPACE_TO_DEPTH_H__
#define __SIMPLE_SPACE_TO_DEPTH_H__
#include "internal/arm_compute.h"
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "internal/arm_compute/Cast.h"
+/**
+ * @brief Class to run SimpleEmbeddingLookup Layer
+ */
class SimpleSpaceToDepth : public ::arm_compute::IFunction
{
public:
- /** Initialise input and output
- *
- * @param[in] input First tensor input.
- * @param[out] output Output tensor.
- * @param[in] block_size Block size.
+ SimpleSpaceToDepth(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{}
+ {
+ // DO NOTHING
+ }
+
+ /**
+ * @brief Configure the layer
+ * @param[in] input First tensor input.
+ * @param[in] output Output tensor.
+ * @param[in] block_size Block size.
+ * @param[in] axises Axises of rank 4
+ * @return N/A
*/
void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
- const ::arm_compute::Coordinates &axises);
+ const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+ /**
+ * @brief Run the operation. Must be called after configure().
+ * @return N/A
+ */
void run() override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc
new file mode 100644
index 000000000..abc291289
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleTransposeConv.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleTransposeConv::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
+ ::arm_compute::ITensor *output,
+ ::arm_compute::PadStrideInfo &tconv_info,
+ ::arm_compute::Coordinates axises)
+{
+ auto rank = axises.num_dimensions();
+
+ assert(rank == 4);
+
+ _input = input;
+ _weights = weights;
+ _output = output;
+ _stride_width = tconv_info.stride().first;
+ _stride_height = tconv_info.stride().second;
+ _pad_width = tconv_info.pad_left();
+ _pad_height = tconv_info.pad_top();
+ _axises = axises;
+}
+
+template <typename T>
+inline void ApplyTransposeConv(
+ const ::arm_compute::TensorShape &input_shape, const ::arm_compute::ITensor *input_data,
+ const ::arm_compute::TensorShape &filter_shape, const ::arm_compute::ITensor *filter_data,
+ const ::arm_compute::TensorShape &output_shape, const ::arm_compute::ITensor *output_data,
+ const int32_t stride_width, const int32_t stride_height, const int32_t pad_width,
+ const int32_t pad_height, const ::arm_compute::Coordinates axises)
+{
+ const int batches = input_shape[axises[0]];
+ const int input_height = input_shape[axises[1]];
+ const int input_width = input_shape[axises[2]];
+ const int input_depth = input_shape[axises[3]];
+
+ const int filter_height = filter_shape[axises[1]];
+ const int filter_width = filter_shape[axises[2]];
+
+ const int output_height = output_shape[axises[1]];
+ const int output_width = output_shape[axises[2]];
+ const int output_depth = output_shape[axises[3]];
+
+ assert(batches == output_shape[axises[0]]);
+ assert(input_depth == filter_shape[axises[3]]);
+ assert(filter_shape[axises[0]] == output_depth);
+
+ // Although transpose convolution simplifies to convolution with transposed
+ // weights for strides of 1, non-unitary striding complicates matters. To
+ // keep this reference implementation as clear as possible, we use a
+ // "scatter" access pattern, where we loop through all the input elements,
+ // computing their influence on the output, rather than looping through the
+ // output elements in the typical "gather" access pattern of a conv. We
+ // therefore must initialize the output array to zero.
+
+ // Loop through input elements one at a time.
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ // Loop through the output elements it will influence
+ const int out_x_origin = (in_x * stride_width) - pad_width;
+ const int out_y_origin = (in_y * stride_height) - pad_height;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ // Compute output element location
+ const int out_x = out_x_origin + filter_x;
+ const int out_y = out_y_origin + filter_y;
+ // We cannot accumulate out of bounds
+ if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
+ (out_y < output_height))
+ {
+ auto input_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{batch, in_y, in_x, in_channel}, axises);
+ auto filter_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{in_channel, filter_y, filter_x, out_channel},
+ axises);
+ auto output_id = asARMComputeCoordinates(
+ ::arm_compute::Coordinates{batch, out_y, out_x, out_channel}, axises);
+ T input_value = *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+ T filter_value = *reinterpret_cast<T *>(filter_data->ptr_to_element(filter_id));
+ *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) +=
+ input_value * filter_value;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void SimpleTransposeConv::run()
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->map(q);
+ CAST_CL(_weights)->map(q);
+ CAST_CL(_output)->map(q);
+ }
+
+ switch (_input->info()->data_type())
+ {
+ case ::arm_compute::DataType::S32:
+ ApplyTransposeConv<int32_t>(_input->info()->tensor_shape(), _input,
+ _weights->info()->tensor_shape(), _weights,
+ _output->info()->tensor_shape(), _output, _stride_width,
+ _stride_height, _pad_width, _pad_height, _axises);
+ break;
+ case ::arm_compute::DataType::F32:
+ ApplyTransposeConv<float>(_input->info()->tensor_shape(), _input,
+ _weights->info()->tensor_shape(), _weights,
+ _output->info()->tensor_shape(), _output, _stride_width,
+ _stride_height, _pad_width, _pad_height, _axises);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DataType not supported");
+ break;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ CAST_CL(_input)->unmap(q);
+ CAST_CL(_weights)->unmap(q);
+ CAST_CL(_output)->unmap(q);
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h
new file mode 100644
index 000000000..c5519828b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TRANSPOSE_CONV_EX__
+#define __TRANSPOSE_CONV_EX__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleTransposeConv : public ::arm_compute::IFunction
+{
+public:
+ SimpleTransposeConv()
+ : _input(nullptr), _weights(nullptr), _output(nullptr), _stride_width(0), _stride_height(0),
+ _pad_width(0), _pad_height(0)
+ {
+ // DO NOTHING
+ }
+
+ /** Initialise input and output
+ *
+ * @param[in] input First tensor input.
+ * @param[in] weights Weights
+ * @param[out] output Output tensor.
+ * @param[in] tc_info Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] axises Axises of rank 4
+ */
+ void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
+ ::arm_compute::ITensor *output, ::arm_compute::PadStrideInfo &tconv_info,
+ ::arm_compute::Coordinates axises = getARMComputeAxises(4));
+
+ void run() override;
+
+private:
+ ::arm_compute::ITensor *_input;
+ ::arm_compute::ITensor *_weights;
+ ::arm_compute::ITensor *_output;
+ int32_t _stride_width;
+ int32_t _stride_height;
+ int32_t _pad_width;
+ int32_t _pad_height;
+ ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__TRANSPOSE_CONV_EX__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc
new file mode 100644
index 000000000..910595a44
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/arm_compute.h"
+#include "SimpleUnpackLayer.h"
+
+void SimpleUnpackLayer::configure(::arm_compute::ICLTensor *input,
+ const std::vector<::arm_compute::ICLTensor *> &output_vector,
+ int32_t axis)
+{
+ uint32_t nr_outputs = output_vector.size();
+ _cl_permuted_vector.resize(nr_outputs);
+ _cl_permute_vector.resize(nr_outputs);
+ uint32_t input_rank = input->info()->num_dimensions();
+ const ::arm_compute::PermutationVector pv{2, 0, 1};
+ _input = input;
+ // Negatige axis is supported, -1 implies R-1 axis where R is input rank
+ if (axis < 0)
+ {
+ axis += input_rank;
+ }
+ _axis = ToARMComputeAxis(input_rank, axis).value();
+ _cl_reshape_vector.resize(nr_outputs);
+
+ ::arm_compute::TensorShape subTensor_shape{};
+ for (int i = 0; i < input_rank; i++)
+ {
+ if (i != _axis)
+ {
+ subTensor_shape.set(i, _input->info()->tensor_shape()[i]);
+ }
+ else
+ {
+ subTensor_shape.set(i, 1);
+ }
+ }
+
+ auto subTensor_offset = ::arm_compute::Coordinates{};
+ subTensor_offset.set_num_dimensions(input_rank);
+
+ for (int i = 0; i < output_vector.size(); i++)
+ {
+ _output_vector.push_back(output_vector[i]);
+ subTensor_offset[_axis] = i;
+ auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+ CAST_CL(_input), subTensor_shape, subTensor_offset, true);
+ _sub_tensor_vector.push_back(temp_tensor);
+ // Copies into the subtensor
+ _cl_permute_vector[i].configure(_sub_tensor_vector[i].get(), &_cl_permuted_vector[i], pv);
+ _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], CAST_CL(_output_vector[i]));
+ _cl_permuted_vector[i].allocator()->allocate();
+ }
+}
+
+void SimpleUnpackLayer::run(void)
+{
+ for (int i = 0; i < _output_vector.size(); i++)
+ {
+ _cl_permute_vector[i].run();
+ _cl_reshape_vector[i].run();
+ }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h
new file mode 100644
index 000000000..52fc7513d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __UNPACK_LAYER_H__
+#define __UNPACK_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+class SimpleUnpackLayer : public ::arm_compute::IFunction
+{
+public:
+ SimpleUnpackLayer(void)
+ : _cl_permuted_vector{}, _output_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
+ _cl_permute_vector{}, _input(nullptr), _axis(0)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void configure(::arm_compute::ICLTensor *input,
+ const std::vector<::arm_compute::ICLTensor *> &output_vector, int32_t axis);
+
+public:
+ void run(void) override;
+
+private:
+ std::vector<::arm_compute::CLTensor> _cl_permuted_vector;
+ std::vector<::arm_compute::ICLTensor *> _output_vector;
+ std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector;
+ std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector;
+ std::vector<::arm_compute::CLPermute> _cl_permute_vector;
+ ::arm_compute::ICLTensor *_input;
+ int32_t _axis;
+};
+
+#endif // __UNPACK_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc
deleted file mode 100644
index 3f988a819..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "SquaredDifferenceOperation.h"
-#include "internal/arm_compute.h"
-
-void SquaredDifferenceOperation::configure(::arm_compute::ITensor *input1,
- ::arm_compute::ITensor *input2,
- ::arm_compute::ITensor *output,
- ::arm_compute::ConvertPolicy ConvertPolicy, float scale,
- ::arm_compute::RoundingPolicy RoundingPolicy)
-{
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- if (::internal::arm_compute::isGpuMode())
- {
- _cl_sub.configure(CAST_CL(input1), CAST_CL(input2), CAST_CL(output), ConvertPolicy);
- _cl_mul.configure(CAST_CL(output), CAST_CL(output), CAST_CL(output), scale, ConvertPolicy,
- RoundingPolicy);
- }
- else
- {
- _neon_sub.configure(CAST_NE(input1), CAST_NE(input2), CAST_NE(output), ConvertPolicy);
- _neon_mul.configure(CAST_NE(output), CAST_NE(output), CAST_NE(output), scale, ConvertPolicy,
- RoundingPolicy);
- }
-}
-
-void SquaredDifferenceOperation::run(void)
-{
- if (::internal::arm_compute::isGpuMode())
- {
- _cl_sub.run();
- _cl_mul.run();
- }
- else
- {
- _neon_sub.run();
- _neon_mul.run();
- }
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h
deleted file mode 100644
index 3782c4e8c..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __SQUARED_DIFFERENCE_OPERATION_H__
-#define __SQUARED_DIFFERENCE_OPERATION_H__
-
-#include <arm_compute/runtime/Tensor.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
-
-#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h>
-#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h>
-#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
-#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
-
-class SquaredDifferenceOperation : public ::arm_compute::IFunction
-{
-public:
- void configure(::arm_compute::ITensor *input1, ::arm_compute::ITensor *input2,
- ::arm_compute::ITensor *output, ::arm_compute::ConvertPolicy ConvertPolicy,
- float scale, ::arm_compute::RoundingPolicy RoundingPolicy);
-
-public:
- void run(void) override;
-
-private:
- ::arm_compute::ITensor *_input1;
- ::arm_compute::ITensor *_input2;
-
- ::arm_compute::ITensor *_output;
-
-private:
- ::arm_compute::CLArithmeticSubtraction _cl_sub;
- ::arm_compute::CLPixelWiseMultiplication _cl_mul;
-
- ::arm_compute::NEArithmeticSubtraction _neon_sub;
- ::arm_compute::NEPixelWiseMultiplication _neon_mul;
-};
-#endif // __SQUARED_DIFFERENCE_OPERATION_H__
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h
index 764b9b13a..ac25692a1 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h
@@ -14,12 +14,17 @@
* limitations under the License.
*/
+/**
+ * @file Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::feature::Reader
+ */
#ifndef __INTERNAL_NNAPI_FEATURE_READER_H__
#define __INTERNAL_NNAPI_FEATURE_READER_H__
#include "internal/nnapi/feature/Utils.h"
-#include "util/feature/Reader.h"
+#include "misc/feature/Reader.h"
namespace internal
{
@@ -28,20 +33,40 @@ namespace nnapi
namespace feature
{
-template <typename T> class Reader final : public nnfw::util::feature::Reader<T>
+/**
+ * @brief Class to support reading element in feature(3D, 4D)
+ */
+template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
{
public:
+ /**
+ * @brief Construct a new Reader object
+ * @param[in] shape Shape of feature
+ * @param[in] ptr Pointer to feature data
+ * @param[in] len Size of tensor (byte)
+ */
// NOTE The parameter len denotes the number of bytes.
- Reader(const ::nnfw::util::feature::Shape &shape, const T *ptr, size_t len)
+ Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
: _shape{shape}, _ptr{ptr}
{
assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
}
public:
- const nnfw::util::feature::Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get shape of feature
+ * @return Shape of feature
+ */
+ const nnfw::misc::feature::Shape &shape(void) const { return _shape; }
public:
+ /**
+ * @brief Get value of element using channel, row, and column index for 3D feature
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t ch, uint32_t row, uint32_t col) const override
{
uint32_t index = index_of(_shape, ch, row, col);
@@ -51,6 +76,14 @@ public:
return arr[index];
}
+ /**
+ * @brief Get value of element using batch, channel, row, and column index for 4D feature
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
{
uint32_t index = index_of(_shape, batch, ch, row, col);
@@ -59,7 +92,7 @@ public:
}
private:
- nnfw::util::feature::Shape _shape;
+ nnfw::misc::feature::Shape _shape;
private:
const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h
index a64ff5d63..ee59d217e 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h
@@ -14,10 +14,15 @@
* limitations under the License.
*/
+/**
+ * @file Utils.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines utility functions used in internal::nnapi::feature namespace
+ */
#ifndef __INTERNAL_NNAPI_FEATURE_UTILS_H__
#define __INTERNAL_NNAPI_FEATURE_UTILS_H__
-#include "util/feature/Shape.h"
+#include "misc/feature/Shape.h"
namespace internal
{
@@ -26,7 +31,15 @@ namespace nnapi
namespace feature
{
-inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t ch, uint32_t row,
+/**
+ * @brief Get position of element using channel, row, and column for 3D feature
+ * @param[in] shape Shape of feature
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Position of element
+ */
+inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t ch, uint32_t row,
uint32_t col)
{
uint32_t res = 0;
@@ -39,7 +52,16 @@ inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t ch,
return res;
}
-inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t batch, uint32_t ch,
+/**
+ * @brief Get position of element using batch, channel, row, and column for 4D feature
+ * @param[in] shape Shape of feature
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Position of element
+ */
+inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t batch, uint32_t ch,
uint32_t row, uint32_t col)
{
uint32_t res = 0;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h
index 083b6b055..965e42f1c 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h
@@ -14,12 +14,17 @@
* limitations under the License.
*/
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::feature::View class
+ */
#ifndef __INTERNAL_NNAPI_FEATURE_VIEW_H__
#define __INTERNAL_NNAPI_FEATURE_VIEW_H__
#include "internal/nnapi/feature/Utils.h"
-#include "util/feature/Reader.h"
+#include "misc/feature/Reader.h"
namespace internal
{
@@ -28,25 +33,55 @@ namespace nnapi
namespace feature
{
-template <typename T> class View final : public nnfw::util::feature::Reader<T>
+/**
+ * @brief Class to access feature's element information using index
+ */
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
{
public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] shape Shape of feature
+ * @param[in] ptr Pointer to feature data
+ * @param[in] len Size of feature (byte)
+ * @return
+ */
// NOTE The parameter len denotes the number of bytes.
- View(const ::nnfw::util::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
+ View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
{
assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
}
public:
- const nnfw::util::feature::Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get shape of feature
+ * @return Shape of feature
+ */
+ const nnfw::misc::feature::Shape &shape(void) const { return _shape; }
public:
+ /**
+ * @brief Get value of element in 3D feature using channel, row, and column index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t ch, uint32_t row, uint32_t col) const override
{
uint32_t index = index_of(_shape, ch, row, col);
return _ptr[index];
}
+
+ /**
+ * @brief Get value of element in 4D feature using batch, channel, row and column index
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
{
uint32_t index = index_of(_shape, batch, ch, row, col);
@@ -54,12 +89,28 @@ public:
return _ptr[index];
}
+ /**
+ * @brief Get reference of element in 3D feature using channel, row, and column index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
T &at(uint32_t ch, uint32_t row, uint32_t col)
{
uint32_t index = index_of(_shape, ch, row, col);
return _ptr[index];
}
+
+ /**
+ * @brief Get reference of element in 4D feature using batch, channel, row and column index
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
{
uint32_t index = index_of(_shape, batch, ch, row, col);
@@ -68,7 +119,7 @@ public:
}
private:
- nnfw::util::feature::Shape _shape;
+ nnfw::misc::feature::Shape _shape;
private:
T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
index 0853a8c89..ae964f74c 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+/**
+ * @file Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::kernel::Reader class
+ */
#ifndef __INTERNAL_NNAPI_KERNEL_READER_H__
#define __INTERNAL_NNAPI_KERNEL_READER_H__
-#include "util/kernel/Shape.h"
-#include "util/kernel/Reader.h"
+#include "misc/kernel/Shape.h"
+#include "misc/kernel/Reader.h"
namespace internal
{
@@ -27,20 +32,41 @@ namespace nnapi
namespace kernel
{
-template <typename T> class Reader final : public nnfw::util::kernel::Reader<T>
+/**
+ * @brief Class to support reading element in kernel
+ */
+template <typename T> class Reader final : public nnfw::misc::kernel::Reader<T>
{
public:
+ /**
+ * @brief Construct a new Reader object
+ * @param[in] shape Shape of kernel
+ * @param[in] ptr Pointer to kernel data
+ * @param[in] len Size of kernel (byte)
+ */
// NOTE The parameter len denotes the number of bytes.
- Reader(const ::nnfw::util::kernel::Shape &shape, const T *ptr, size_t len)
+ Reader(const ::nnfw::misc::kernel::Shape &shape, const T *ptr, size_t len)
: _shape{shape}, _ptr{ptr}
{
assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
}
public:
- const nnfw::util::kernel::Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get shape of kernel
+ * @return Shape of kernel
+ */
+ const nnfw::misc::kernel::Shape &shape(void) const { return _shape; }
public:
+ /**
+ * @brief Get value of element for kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
{
// NNAPI uses NHWC ordering
@@ -55,7 +81,7 @@ public:
}
private:
- nnfw::util::kernel::Shape _shape;
+ nnfw::misc::kernel::Shape _shape;
private:
const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
index f6f0f3908..f03a4be31 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+/**
+ * @file Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::matrix::Reader class
+ */
#ifndef __INTERNAL_NNAPI_MATRIX_READER_H__
#define __INTERNAL_NNAPI_MATRIX_READER_H__
-#include "util/matrix/Shape.h"
-#include "util/matrix/Reader.h"
+#include "misc/matrix/Shape.h"
+#include "misc/matrix/Reader.h"
namespace internal
{
@@ -27,20 +32,39 @@ namespace nnapi
namespace matrix
{
-template <typename T> class Reader final : public nnfw::util::matrix::Reader<T>
+/**
+ * @brief Class to support reading element in matrix
+ */
+template <typename T> class Reader final : public nnfw::misc::matrix::Reader<T>
{
public:
+ /**
+ * @brief Construct a new Reader object
+ * @param[in] shape Shape of matrix
+ * @param[in] ptr Pointer to matrix data
+ * @param[in] len Size of matrix (byte)
+ */
// NOTE The parameter len denotes the number of bytes.
- Reader(const ::nnfw::util::matrix::Shape &shape, const T *ptr, size_t len)
+ Reader(const ::nnfw::misc::matrix::Shape &shape, const T *ptr, size_t len)
: _shape{shape}, _ptr{ptr}
{
assert(shape.H * shape.W * sizeof(T) == len);
}
public:
- const nnfw::util::matrix::Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get shape of matrix
+ * @return Shape of matrix
+ */
+ const nnfw::misc::matrix::Shape &shape(void) const { return _shape; }
public:
+ /**
+ * @brief Get value of element for matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
T at(uint32_t row, uint32_t col) const override
{
// NNAPI uses NHWC ordering
@@ -53,7 +77,7 @@ public:
}
private:
- nnfw::util::matrix::Shape _shape;
+ nnfw::misc::matrix::Shape _shape;
private:
const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
index 38d1b291b..6a3fff646 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file        ConstView.h
+ * @brief       This file contains ConstView class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
#define __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
@@ -27,21 +33,36 @@ namespace nnapi
namespace tensor
{
+/**
+ * @brief Wrapper class to read tensor values
+ * @tparam T The tensor element type
+ */
template <typename T> class ConstView
{
public:
- ConstView(const ::nnfw::util::tensor::Shape &shape, const uint8_t *ptr, size_t len)
+ /**
+ * @brief Construct a ConstView class
+ * @param[in] shape Tensor shape
+ * @param[in] ptr The base pointer of actual data
+ * @param[in] len The number of bytes
+ */
+ ConstView(const ::nnfw::misc::tensor::Shape &shape, const uint8_t *ptr, size_t len)
: _shape{shape}, _ptr{ptr}, _len{len}
{
// DO NOTHING
}
public:
- const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+ const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
private:
// TODO Make this as a helper function, and share it for both View<T> and ConstView<T>
- uint32_t offset_of(const nnfw::util::tensor::Index &index) const
+ /**
+ * @brief Calculate offset for the given tensor index
+ * @param[in] index Tensor index
+ * @return The calculated offset
+ */
+ uint32_t offset_of(const nnfw::misc::tensor::Index &index) const
{
if (_shape.rank() == 0)
{
@@ -61,7 +82,12 @@ private:
}
public:
- T at(const nnfw::util::tensor::Index &index) const
+ /**
+ * @brief Get the value on the given index
+ * @param[in] index Flattened tensor index
+ * @return The value on the given index
+ */
+ T at(const nnfw::misc::tensor::Index &index) const
{
const auto offset = offset_of(index);
@@ -71,7 +97,7 @@ public:
}
private:
- const nnfw::util::tensor::Shape _shape;
+ const nnfw::misc::tensor::Shape _shape;
private:
const uint8_t *const _ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
index fe89e572e..cc51db594 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
@@ -14,11 +14,17 @@
* limitations under the License.
*/
+/**
+ * @file        Reader.h
+ * @brief       This file contains Reader class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_NNAPI_TENSOR_READER_H__
#define __INTERNAL_NNAPI_TENSOR_READER_H__
#include <vector>
-#include "util/tensor/Reader.h"
+#include "misc/tensor/Reader.h"
namespace internal
{
@@ -27,11 +33,20 @@ namespace nnapi
namespace tensor
{
-template <typename T> class Reader final : public nnfw::util::tensor::Reader<T>
+/**
+ * @brief Wrapper class to read tensor values
+ * @tparam T The tensor element type
+ */
+template <typename T> class Reader final : public nnfw::misc::tensor::Reader<T>
{
public:
- // NOTE The parameter len denotes the number of bytes.
- Reader(const ::nnfw::util::tensor::Shape &shape, const T *ptr, size_t len)
+ /**
+ * @brief Construct a Reader class
+ * @param[in] shape Tensor shape
+ * @param[in] ptr The base pointer of actual data
+ * @param[in] len The number of bytes
+ */
+ Reader(const ::nnfw::misc::tensor::Shape &shape, const T *ptr, size_t len)
: _shape{shape}, _ptr{ptr}
{
assert(shape.element_nums() * sizeof(T) == len);
@@ -39,10 +54,19 @@ public:
}
public:
- const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get shape object
+ * @return The shape as const reference
+ */
+ const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
public:
- T at(const nnfw::util::tensor::Index &index_nnapi) const override
+ /**
+ * @brief Get the value on the given index
+ * @param[in] index_nnapi Flattened tensor index
+ * @return The value on the given index
+ */
+ T at(const nnfw::misc::tensor::Index &index_nnapi) const override
{
uint32_t offset = 0;
@@ -53,17 +77,19 @@ public:
}
private:
- /*
- Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr,
- we need to calculate the offset of index [i4, .. i1] as follows:
- offset = i4 * (d3 * d2 * d1) +
- i3 * (d2 * d1) +
- i2 * (d1) +
- i1
- So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is calculate.
- To minimize this repetitive calculation,
- _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0]
- */
+ /**
+ * @brief Initializes @c _stridess
+ * @return N/A
+ * @note Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr,
+ we need to calculate the offset of index [i4, .. i1] as follows:
+ offset = i4 * (d3 * d2 * d1) +
+ i3 * (d2 * d1) +
+ i2 * (d1) +
+ i1
+ So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is
+ calculate. To minimize this repetitive calculation,
+ _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0]
+ */
void initialize(void)
{
for (int r = 0; r < _shape.rank(); r++)
@@ -76,7 +102,7 @@ private:
}
private:
- nnfw::util::tensor::Shape _shape;
+ nnfw::misc::tensor::Shape _shape;
private:
const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h
index 80e1bb057..f8f297f97 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h
@@ -14,11 +14,16 @@
* limitations under the License.
*/
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::tensor::View class
+ */
#ifndef __INTERNAL_NNAPI_TENSOR_VIEW_H__
#define __INTERNAL_NNAPI_TENSOR_VIEW_H__
-#include "util/tensor/Shape.h"
-#include "util/tensor/Index.h"
+#include "misc/tensor/Shape.h"
+#include "misc/tensor/Index.h"
namespace internal
{
@@ -27,20 +32,38 @@ namespace nnapi
namespace tensor
{
+/**
+ * @brief Class to access tensor's element information using index
+ */
template <typename T> class View
{
public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] shape Shape of tensor
+ * @param[in] ptr Pointer to tensor data
+ * @param[in] len Size of tensor (byte)
+ */
// NOTE The parameter len denotes the number of bytes.
- View(const ::nnfw::util::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
+ View(const ::nnfw::misc::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
{
assert(shape.element_nums() * sizeof(T) == len);
}
public:
- const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get shape of tensor
+ * @return Shape of tensor
+ */
+ const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
private:
- uint32_t offset_of(const nnfw::util::tensor::Index &index) const
+ /**
+ * @brief Get position of element using index in tensor
+ * @param[in] index Index of element
+ * @return Position of element
+ */
+ uint32_t offset_of(const nnfw::misc::tensor::Index &index) const
{
if (_shape.rank() == 0)
{
@@ -60,14 +83,24 @@ private:
}
public:
- T at(const nnfw::util::tensor::Index &index) const
+ /**
+ * @brief Get value of element at index
+ * @param[in] index Index of element
+ * @return Value of element at index
+ */
+ T at(const nnfw::misc::tensor::Index &index) const
{
const auto offset = offset_of(index);
return _ptr[offset];
}
- T &at(const nnfw::util::tensor::Index &index)
+ /**
+ * @brief Get reference of element at index
+ * @param[in] index Index of element
+ * @return Reference of element at index
+ */
+ T &at(const nnfw::misc::tensor::Index &index)
{
const auto offset = offset_of(index);
@@ -75,7 +108,7 @@ public:
}
private:
- nnfw::util::tensor::Shape _shape;
+ nnfw::misc::tensor::Shape _shape;
private:
T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/op/Abs.cc b/runtimes/pure_arm_compute/src/internal/op/Abs.cc
new file mode 100644
index 000000000..e23a9538c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Abs.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Abs.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+ input_index = inputs[0];
+}
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Abs.h b/runtimes/pure_arm_compute/src/internal/op/Abs.h
new file mode 100644
index 000000000..0be8b0205
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Abs.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_ABS_H__
+#define __INTERNAL_OP_ABS_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+struct Param
+{
+ int32_t output_index;
+ int32_t input_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ABS_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Add.h b/runtimes/pure_arm_compute/src/internal/op/Add.h
index 42ed5b976..a7804a569 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Add.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Add.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Add.h
+ * @brief This file contains accept function and params for Add operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_ADD_H__
#define __INTERNAL_OP_ADD_H__
@@ -30,33 +36,66 @@ namespace op
namespace Add
{
+/**
+ * @brief Struct of Add operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t lhs_index;
- int32_t rhs_index;
- int32_t activation_index;
+ int32_t lhs_index; /**< Left hand side index */
+ int32_t rhs_index; /**< Right hand side index */
+ int32_t activation_index; /**< Activation index */
+ /**
+ * @brief Construct a new Param object for Add as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Add with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Add
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for Add with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for Add
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for Add
+ * @return Parameters of Add
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for Add
+ * @param [in] v Node visitor for invoking visit function of Add
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc b/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc
new file mode 100644
index 000000000..485430377
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/op/ArgMax.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/ArgMax.h b/runtimes/pure_arm_compute/src/internal/op/ArgMax.h
new file mode 100644
index 000000000..780af2232
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ArgMax.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_ARGMAX_H__
+#define __INTERNAL_OP_ARGMAX_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+ int32_t axis_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ARGMAX_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h b/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h
index 729f6043c..cf9061ca9 100644
--- a/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file AvgPool2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::AvgPool2D Param structs
+ * and internal::tflite::op::AvgPool2D Node classes
+ */
#ifndef __INTERNAL_OP_AVG_POOL_2D_H__
#define __INTERNAL_OP_AVG_POOL_2D_H__
@@ -32,44 +38,75 @@ namespace AvgPool2D
namespace Explicit
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Index of output feature map */
- int32_t ifm_index;
+ int32_t ifm_index; /**< Index of input feature map */
- int32_t kw_index;
- int32_t kh_index;
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
- int32_t padding_left_index;
- int32_t padding_right_index;
- int32_t padding_top_index;
- int32_t padding_bottom_index;
-
- int32_t activation_index;
+ int32_t padding_left_index; /**< Index of padding left */
+ int32_t padding_right_index; /**< Index of padding right */
+ int32_t padding_top_index; /**< Index of padding top */
+ int32_t padding_bottom_index; /**< Index of padding bottom */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
@@ -81,40 +118,71 @@ private:
namespace Implicit
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
+ int32_t ofm_index; /**< Index of output feature map */
- int32_t kw_index;
- int32_t kh_index;
+ int32_t ifm_index; /**< Index of input feature map */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
- int32_t padding_index;
- int32_t activation_index;
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+ int32_t padding_index; /**< Index of padding */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
new file mode 100644
index 000000000..0768039d0
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
@@ -0,0 +1,63 @@
+/*Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/op/BatchToSpaceNd.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+}
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h
new file mode 100644
index 000000000..a514cb44c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_BATCHTOSPACE_ND_H__
+#define __INTERNAL_OP_BATCHTOSPACE_ND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+ int32_t block_size_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_BATCHTOSPACE_Nd_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Cast.h b/runtimes/pure_arm_compute/src/internal/op/Cast.h
index 3b3795189..8af741a16 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Cast.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Cast.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Cast.h
+ * @brief This file contains accept function and params for Cast operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_CAST_H__
#define __INTERNAL_OP_CAST_H__
@@ -30,31 +36,64 @@ namespace op
namespace Cast
{
+/**
+ * @brief Struct of Cast operation's param
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; /**< Output index */
- int32_t input_index;
+ int32_t input_index; /**< Input index */
+ /**
+ * @brief Construct a new Param object for Cast as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Cast with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Cast
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for Cast with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for Cast
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for Cast
+ * @return Parameters of Cast
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for Cast
+ * @param [in] v Node visitor for invoking visit function of Cast
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Concat.h b/runtimes/pure_arm_compute/src/internal/op/Concat.h
index 185cba3e1..207f964fb 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Concat.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Concat.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Concat.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Concat node
+ */
+
#ifndef __INTERNAL_OP_CONCAT_H__
#define __INTERNAL_OP_CONCAT_H__
@@ -31,36 +37,68 @@ namespace op
namespace Concat
{
+/**
+ * @brief Struct to manipulate parameter for Concat operation
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; //!< index for output
- std::vector<int32_t> ifm_indexes;
- int32_t axis_index;
+ std::vector<int32_t> ifm_indexes; //!< index for input
+ int32_t axis_index; //!< index for axis
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define Concat Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Concat Node object
+ * @param param Parameter for Concat Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for Concat node
};
} // namespace Concat
diff --git a/runtimes/pure_arm_compute/src/internal/op/Conv2D.h b/runtimes/pure_arm_compute/src/internal/op/Conv2D.h
index b04b8c85f..de46fbb9c 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Conv2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Conv2D.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Conv2D.h
+ * @brief This file contains accept function and params for Conv2D operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_CONV_2D_H__
#define __INTERNAL_OP_CONV_2D_H__
@@ -32,43 +38,76 @@ namespace Conv2D
namespace Explicit
{
+/**
+ * @brief Struct of Conv2D(explicit) operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
- int32_t ker_index;
- int32_t bias_index;
+ int32_t ifm_index; /**< Input format index */
+ int32_t ker_index; /**< Kernel index */
+ int32_t bias_index; /**< Bias index */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
- int32_t padding_left_index;
- int32_t padding_right_index;
- int32_t padding_top_index;
- int32_t padding_bottom_index;
+ int32_t padding_left_index; /**< Left padding index */
+ int32_t padding_right_index; /**< Right padding index */
+ int32_t padding_top_index; /**< Top padding index */
+ int32_t padding_bottom_index; /**< Bottomd padding index */
- int32_t activation_index;
+ int32_t activation_index; /**< Activation index */
+ /**
+ * @brief Construct a new Param object for Conv2D(explicit) as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Conv2D(explicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Conv2D(explicit)
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for conv2D(explicit) with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for conv2D(explicit)
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for conv2D(explicit)
+ * @return Parameters of conv2D(explicit)
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for conv2D(explicit)
+ * @param [in] v Node visitor for invoking visit function of conv2D(explicit)
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
@@ -80,39 +119,72 @@ private:
namespace Implicit
{
+/**
+ * @brief Struct of Conv2D(implicit) operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
- int32_t ker_index;
- int32_t bias_index;
+ int32_t ifm_index; /**< Input format index */
+ int32_t ker_index; /**< Kernel index */
+ int32_t bias_index; /**< Bias index */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
- int32_t padding_index;
- int32_t activation_index;
+ int32_t padding_index; /**< Padding index */
+ int32_t activation_index; /**< Activation index */
+ /**
+ * @brief Construct a new Param object for Conv2D(implicit) as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Conv2D(implicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Conv2D(implicit)
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for conv2D(implicit) with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for conv2D(implicit)
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for conv2D(implicit)
+ * @return Parameters of conv2D(implicit)
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for conv2D(implicit)
+ * @param [in] v Node visitor for invoking visit function of conv2D(implicit)
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc
new file mode 100644
index 000000000..db164a148
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/DepthToSpace.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+}
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h
new file mode 100644
index 000000000..dd4c5c914
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_DEPTHTOSPACE_H__
+#define __INTERNAL_OP_DEPTHTOSPACE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+ int32_t block_size_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_DEPTHTOSPACE_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
index 77ab4b63e..c63e30aae 100644
--- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file DepthwiseConv2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::DepthwiseConv2D Param structs
+ * and internal::tflite::op::DepthwiseConv2D Node classes
+ */
#ifndef __INTERNAL_OP_DEPTHWISE_CONV_2D_H__
#define __INTERNAL_OP_DEPTHWISE_CONV_2D_H__
@@ -32,44 +38,75 @@ namespace DepthwiseConv2D
namespace Explicit
{
+/**
+ * @brief Struct to have indexes for explicit padding DepthwiseConv2D operation parameter
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Index of output feature map */
- int32_t ifm_index;
- int32_t ker_index;
- int32_t bias_index;
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t ker_index; /**< Index of kernel */
+ int32_t bias_index; /**< Index of bias */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
- int32_t padding_left_index;
- int32_t padding_right_index;
- int32_t padding_top_index;
- int32_t padding_bottom_index;
-
- int32_t multipler_index;
- int32_t activation_index;
+ int32_t padding_left_index; /**< Index of padding left */
+ int32_t padding_right_index; /**< Index of padding right */
+ int32_t padding_top_index; /**< Index of padding top */
+ int32_t padding_bottom_index; /**< Index of padding bottom */
+ int32_t multipler_index; /**< Index of multipler */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an explicit padding DepthwiseConv2D operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
@@ -78,43 +115,74 @@ private:
} // namespace Explicit
+/**
+ * @brief Struct to have indexes for implicit padding DepthwiseConv2D operation parameter
+ */
namespace Implicit
{
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
- int32_t ker_index;
- int32_t bias_index;
+ int32_t ofm_index; /**< Index of output feature map */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t ker_index; /**< Index of kernel */
+ int32_t bias_index; /**< Index of bias */
- int32_t padding_index;
- int32_t multipler_index;
- int32_t activation_index;
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+ int32_t padding_index; /**< Index of padding */
+ int32_t multipler_index; /**< Index of multipler */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an implicit padding DepthwiseConv2D operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Dequantize.h b/runtimes/pure_arm_compute/src/internal/op/Dequantize.h
index b0645d136..f19898e9e 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Dequantize.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Dequantize.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Dequantize.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Dequantize::Param struct
+ * and internal::tflite::op::Dequantize::Node class
+ */
#ifndef __INTERNAL_OP_DEQUANTIZE_H__
#define __INTERNAL_OP_DEQUANTIZE_H__
@@ -30,31 +36,62 @@ namespace op
namespace Dequantize
{
+/**
+ * @brief Struct to have indexes for Dequantize operation parameter
+ */
struct Param
{
- int32_t output_index;
-
- int32_t input_index;
+ int32_t output_index; /**< Index of output feature map */
+ int32_t input_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an Dequantize operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Div.h b/runtimes/pure_arm_compute/src/internal/op/Div.h
index 06ed7ec21..d5fc09d19 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Div.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Div.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Div.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Div::Param struct
+ * and internal::tflite::op::Div::Node class
+ */
#ifndef __INTERNAL_OP_DIV_H__
#define __INTERNAL_OP_DIV_H__
@@ -30,33 +36,64 @@ namespace op
namespace Div
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t lhs_index;
- int32_t rhs_index;
- int32_t activation_index;
+ int32_t ofm_index; /**< Index of output feature map */
+ int32_t lhs_index; /**< Index of lhs */
+ int32_t rhs_index; /**< Index of rhs */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h
index 4547f27c7..17e8485f7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file EmbeddingLookup.h
+ * @brief This file contains accept function and params for EmbeddingLookup operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_EMBEDDING_LOOKUP_H__
#define __INTERNAL_OP_EMBEDDING_LOOKUP_H__
@@ -30,32 +36,65 @@ namespace op
namespace EmbeddingLookup
{
+/**
+ * @brief Struct of EmbeddingLookup operation's param
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; /**< Output index */
- int32_t lookups_index;
- int32_t values_index;
+ int32_t lookups_index; /**< Lookups index */
+ int32_t values_index; /**< Values index */
+ /**
+ * @brief Construct a new Param object for EmbeddingLookup as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for EmbeddingLookup with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for EmbeddingLookup
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for EmbeddingLookup with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for EmbeddingLookup
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for EmbeddingLookup
+ * @return Parameters of EmbeddingLookup
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for EmbeddingLookup
+ * @param [in] v Node visitor for invoking visit function of EmbeddingLookup
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Equal.cc b/runtimes/pure_arm_compute/src/internal/op/Equal.cc
new file mode 100644
index 000000000..b9cccc6a9
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Equal.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Equal.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Equal.h b/runtimes/pure_arm_compute/src/internal/op/Equal.h
new file mode 100644
index 000000000..78b9f846f
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Equal.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_EQUAL_H__
+#define __INTERNAL_OP_EQUAL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EQUAL_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Exp.cc b/runtimes/pure_arm_compute/src/internal/op/Exp.cc
new file mode 100644
index 000000000..6f1aa8f42
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Exp.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Exp.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Exp.h b/runtimes/pure_arm_compute/src/internal/op/Exp.h
new file mode 100644
index 000000000..ac7f244b7
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Exp.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_EXP_H__
+#define __INTERNAL_OP_EXP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EXP_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Floor.h b/runtimes/pure_arm_compute/src/internal/op/Floor.h
index 8cf2a841c..5264ec10c 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Floor.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Floor.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Floor.h
+ * @brief This file contains accept function and params for Floor operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_FLOOR_H__
#define __INTERNAL_OP_FLOOR_H__
@@ -30,31 +36,64 @@ namespace op
namespace Floor
{
+/**
+ * @brief Struct of Floor operation's param
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; /**< Output index */
- int32_t input_index;
+ int32_t input_index; /**< Input index */
+ /**
+ * @brief Construct a new Param object for Floor as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Floor with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Floor
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for Floor with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for Floor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for Floor
+ * @return Parameters of Floor
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for Floor
+ * @param [in] v Node visitor for invoking visit function of Floor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h b/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h
index 7a425a6af..434308435 100644
--- a/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h
+++ b/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file FullyConnected.h
+ * @brief This file contains accept function and params for FullyConnected operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_FULLY_CONNTECTED_H__
#define __INTERNAL_OP_FULLY_CONNTECTED_H__
@@ -30,34 +36,70 @@ namespace op
namespace FullyConnected
{
+/**
+ * @brief Struct of FullyConnected operation's param
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; /**< Output index */
- int32_t input_index;
- int32_t weight_index;
- int32_t bias_index;
- int32_t activation_index;
+ int32_t input_index; /**< Input index */
+ int32_t weight_index; /**< Weight index */
+ int32_t bias_index; /**< Bias index */
+ int32_t activation_index; /**< Activation index */
+ /**
+ * @brief Construct a new Param object for FullyConnected as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for FullyConnected with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for FullyConnected
+ */
class Node final : public op::Node
{
+ /**
+ * @brief Construct a new Node object for FullyConnected with param
+ * @param [in] param Parameters for Node
+ */
public:
+ /**
+ * @brief Destroy the Node object for FullyConnected
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for FullyConnected
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Parameter Get parameters for FullyConnected
+ * @return _param Parameters of FullyConnected
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for FullyConnected
+ * @param [in] v Node visitor for invoking visit function of FullyConnected
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Gather.h b/runtimes/pure_arm_compute/src/internal/op/Gather.h
index 5f7fe956f..4470236eb 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Gather.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Gather.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Gather.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Gather operation
+ */
+
#ifndef __INTERNAL_OP_GATHER_H__
#define __INTERNAL_OP_GATHER_H__
@@ -30,37 +36,69 @@ namespace op
namespace Gather
{
+/**
+ * @brief Struct to manipulate parameter for Gather operation
+ */
struct Param
{
- int32_t ofm_index; // output
+ int32_t ofm_index; //!< index for output feature map
- int32_t lhs_index; // input
- int32_t rhs_index; // indexes
- int32_t axis_index; // axis
+ int32_t lhs_index; //!< index for lhs tensor
+ int32_t rhs_index; //!< index for rhs tensor
+ int32_t axis_index; //!< index for axis
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define Gather Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Tanh Node object
+ * @param param Parameter for Tanh Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for Gather node
};
} // namespace Gather
diff --git a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc
index 30a853a64..7e04ecf82 100644
--- a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc
@@ -1,52 +1,68 @@
-#include "internal/op/HashtableLookup.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 3 && outputCount == 2);
-
- output_index = outputs[0];
- hits_index = outputs[1];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> Lookups Index
- // 1 -> Keys Index
- // 2 -> Values Index
- lookups_index = inputs[0];
- keys_index = inputs[1];
- values_index = inputs[2];
-}
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/HashtableLookup.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 2);
+
+ output_index = outputs[0];
+ hits_index = outputs[1];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lookups Index
+ // 1 -> Keys Index
+ // 2 -> Values Index
+ lookups_index = inputs[0];
+ keys_index = inputs[1];
+ values_index = inputs[2];
+}
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h
index 192da2aae..a5b43d1c7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h
@@ -1,56 +1,109 @@
-#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__
-#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-struct Param
-{
- int32_t output_index;
- int32_t hits_index;
-
- int32_t lookups_index;
- int32_t values_index;
- int32_t keys_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param &param) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param &param(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file HashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::HashtableLookup::Param struct
+ * and internal::tflite::op::HashtableLookup::Node class
+ */
+#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+ int32_t hits_index; /**< Index of hits */
+
+ int32_t lookups_index; /**< Index of lookups */
+ int32_t values_index; /**< Index of values */
+ int32_t keys_index; /**< Index of keys */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc
index 449540178..44a6ee63d 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "internal/op/L2Normalization.h"
#include "internal/op/NodeVisitor.h"
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h
index 70fa2fb7e..2e94fac11 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h
@@ -1,3 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file L2Normalization.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::L2Normalization::Param struct
+ * and internal::tflite::op::L2Normalization::Node class
+ */
#ifndef __INTERNAL_OP_L2_NORMALIZATION_H__
#define __INTERNAL_OP_L2_NORMALIZATION_H__
@@ -14,31 +36,62 @@ namespace op
namespace L2Normalization
{
+/**
+ * @brief Struct to have indexes for L2Normalization operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
+ int32_t ofm_index; /**< Index of output feature map */
+ int32_t ifm_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an L2Normalization operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc
index 73c1bb65c..64041ab49 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc
@@ -1,124 +1,124 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "internal/op/L2Pool2D.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 10 && outputCount == 1);
-
- ofm_index = outputs[0];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Tensor Index
- // 1 -> Padding_left index
- // 2 -> Padding_right index
- // 3 -> Padding_top index
- // 4 -> Padding_bottom index
- // 5 -> Horizontal (over width) Stride Index
- // 6 -> Vertial (over height) Stride Index
- // 7 -> Filter Width Index
- // 8 -> Filter Height Index
- // 9 -> FuseCode (activation) Index
- ifm_index = inputs[0];
- padding_left_index = inputs[1];
- padding_right_index = inputs[2];
- padding_top_index = inputs[3];
- padding_bottom_index = inputs[4];
- hstride_index = inputs[5];
- vstride_index = inputs[6];
- kw_index = inputs[7];
- kh_index = inputs[8];
- activation_index = inputs[9];
-}
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 7 && outputCount == 1);
-
- ofm_index = outputs[0];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> IFM Tensor Index
- // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
- // 2 -> Horizontal (over width) Stride Index
- // 3 -> Vertial (over height) Stride Index
- // 4 -> Filter Width Index
- // 5 -> Filter Height Index
- // 6 -> FuseCode (activation) Index
- ifm_index = inputs[0];
- padding_index = inputs[1];
- hstride_index = inputs[2];
- vstride_index = inputs[3];
- kw_index = inputs[4];
- kh_index = inputs[5];
- activation_index = inputs[6];
-}
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/L2Pool2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 10 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_left_index = inputs[1];
+ padding_right_index = inputs[2];
+ padding_top_index = inputs[3];
+ padding_bottom_index = inputs[4];
+ hstride_index = inputs[5];
+ vstride_index = inputs[6];
+ kw_index = inputs[7];
+ kh_index = inputs[8];
+ activation_index = inputs[9];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 7 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_index = inputs[1];
+ hstride_index = inputs[2];
+ vstride_index = inputs[3];
+ kw_index = inputs[4];
+ kh_index = inputs[5];
+ activation_index = inputs[6];
+}
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h
index f4a25539b..facb223c7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h
@@ -1,130 +1,198 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_OP_L2_POOL_2D_H__
-#define __INTERNAL_OP_L2_POOL_2D_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-struct Param
-{
- int32_t ofm_index;
-
- int32_t ifm_index;
-
- int32_t kw_index;
- int32_t kh_index;
-
- int32_t hstride_index;
- int32_t vstride_index;
-
- int32_t padding_left_index;
- int32_t padding_right_index;
- int32_t padding_top_index;
- int32_t padding_bottom_index;
-
- int32_t activation_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param &param) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param &param(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-struct Param
-{
- int32_t ofm_index;
-
- int32_t ifm_index;
-
- int32_t kw_index;
- int32_t kh_index;
-
- int32_t hstride_index;
- int32_t vstride_index;
-
- int32_t padding_index;
- int32_t activation_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param &param) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param &param(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_L2_POOL_2D_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file L2Pool2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::L2Pool2D Param structs
+ * and internal::tflite::op::L2Pool2D Node classes
+ */
+#ifndef __INTERNAL_OP_L2_POOL_2D_H__
+#define __INTERNAL_OP_L2_POOL_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_left_index; /**< Index of padding left */
+ int32_t padding_right_index; /**< Index of padding right */
+ int32_t padding_top_index; /**< Index of padding top */
+ int32_t padding_bottom_index; /**< Index of padding bottom */
+
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_index; /**< Index of padding */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_L2_POOL_2D_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc
new file mode 100644
index 000000000..b7419d923
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LocalResponseNormalization.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 5 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ ifm_index = inputs[0];
+ radius_index = inputs[1];
+ bias_index = inputs[2];
+ alpha_index = inputs[3];
+ beta_index = inputs[4];
+}
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h
new file mode 100644
index 000000000..29e0699ad
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
+#define __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+ int32_t radius_index;
+ int32_t bias_index;
+ int32_t alpha_index;
+ int32_t beta_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc
new file mode 100644
index 000000000..5b7da4d3b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalAnd.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h
new file mode 100644
index 000000000..2f53f756d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_AND_H__
+#define __INTERNAL_OP_LOGICAL_AND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_AND_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc
new file mode 100644
index 000000000..4cb6a8e2a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalNot.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ input_index = inputs[0];
+}
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h
new file mode 100644
index 000000000..9593deafe
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_NOT_H__
+#define __INTERNAL_OP_LOGICAL_NOT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_NOT_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc
new file mode 100644
index 000000000..8295f6f0b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalOr.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h
new file mode 100644
index 000000000..6487fa720
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_OR_H__
+#define __INTERNAL_OP_LOGICAL_OR_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_OR_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Logistic.h b/runtimes/pure_arm_compute/src/internal/op/Logistic.h
index db8935846..a42fdc0d4 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Logistic.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Logistic.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Logistic.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Logistic::Param struct
+ * and internal::tflite::op::Logistic::Node class
+ */
#ifndef __INTERNAL_OP_LOGISTIC_H__
#define __INTERNAL_OP_LOGISTIC_H__
@@ -30,31 +36,61 @@ namespace op
namespace Logistic
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
+ int32_t ofm_index; /**< Index of output feature map */
+ int32_t ifm_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Lstm.h b/runtimes/pure_arm_compute/src/internal/op/Lstm.h
index 056ac2ea7..f51f0402a 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Lstm.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Lstm.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Lstm.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::LSTM::Param struct
+ * and internal::tflite::op::LSTM::Node class
+ */
#ifndef __INTERNAL_OP_LSTM_H__
#define __INTERNAL_OP_LSTM_H__
@@ -30,56 +36,87 @@ namespace op
namespace LSTM
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t scratch_buffer_index;
- int32_t output_state_out_index;
- int32_t cell_state_out_index;
- int32_t output_index;
+ int32_t scratch_buffer_index; /**< Index of scartch buffer */
+ int32_t output_state_out_index; /**< Index of output state out */
+ int32_t cell_state_out_index; /**< Index of cell state out */
+ int32_t output_index; /**< Index of output */
- int32_t input_index;
- int32_t input_to_input_weights_index;
- int32_t input_to_forget_weights_index;
- int32_t input_to_cell_weights_index;
- int32_t input_to_output_weights_index;
- int32_t recurrent_to_input_weights_index;
- int32_t recurrent_to_forget_weights_index;
- int32_t recurrent_to_cell_weights_index;
- int32_t recurrent_to_output_weights_index;
- int32_t cell_to_input_weights_index;
- int32_t cell_to_forget_weights_index;
- int32_t cell_to_output_weights_index;
- int32_t input_gate_bias_index;
- int32_t forget_gate_bias_index;
- int32_t cell_bias_index;
- int32_t output_gate_bias_index;
- int32_t projection_weights_index;
- int32_t projection_bias_index;
- int32_t output_state_in_index;
- int32_t cell_state_in_index;
- int32_t activation_index;
- int32_t cell_threshold_index;
- int32_t projection_threshold_index;
+ int32_t input_index; /**< Index of input */
+ int32_t input_to_input_weights_index; /**< Index of input to input weights */
+ int32_t input_to_forget_weights_index; /**< Index of input to forget weights */
+ int32_t input_to_cell_weights_index; /**< Index of input to cell weights */
+ int32_t input_to_output_weights_index; /**< Index of input to output weights */
+ int32_t recurrent_to_input_weights_index; /**< Index of recurrent to input weights */
+ int32_t recurrent_to_forget_weights_index; /**< Index of recurrent to forget weights */
+ int32_t recurrent_to_cell_weights_index; /**< Index of recurrent to cell weights */
+ int32_t recurrent_to_output_weights_index; /**< Index of recurrent to output weights */
+ int32_t cell_to_input_weights_index; /**< Index of cell to input weights */
+ int32_t cell_to_forget_weights_index; /**< Index of cell to forget weights */
+ int32_t cell_to_output_weights_index; /**< Index of cell to output weights */
+ int32_t input_gate_bias_index; /**< Index of input gate bias */
+ int32_t forget_gate_bias_index; /**< Index of forget gate bias */
+ int32_t cell_bias_index; /**< Index of cell bias */
+ int32_t output_gate_bias_index; /**< Index of output gate bias */
+ int32_t projection_weights_index; /**< Index of projection weights */
+ int32_t projection_bias_index; /**< Index of projection bias */
+ int32_t output_state_in_index; /**< Index of output state in */
+ int32_t cell_state_in_index; /**< Index of cell state in */
+ int32_t activation_index; /**< Index of activation */
+ int32_t cell_threshold_index; /**< Index of cell threshold */
+ int32_t projection_threshold_index; /**< Index of projection threshold */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h b/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h
index d5da17d66..329ccecb7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file MaxPool2D.h
+ * @brief This file contains accept function and params for MaxPool2D operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_MAX_POOL_2D_H__
#define __INTERNAL_OP_MAX_POOL_2D_H__
@@ -32,44 +38,77 @@ namespace MaxPool2D
namespace Explicit
{
+/**
+ * @brief Struct of MaxPool2D(Explicit) operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
+ int32_t ifm_index; /**< Input format index */
- int32_t kw_index;
- int32_t kh_index;
+ int32_t kw_index; /**< Kernel width index */
+ int32_t kh_index; /**< Kernel height index */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
- int32_t padding_left_index;
- int32_t padding_right_index;
- int32_t padding_top_index;
- int32_t padding_bottom_index;
+ int32_t padding_left_index; /**< Left padding index */
+ int32_t padding_right_index; /**< Right padding index */
+ int32_t padding_top_index; /**< Top padding index */
+ int32_t padding_bottom_index; /**< Bottom padding index */
- int32_t activation_index;
+ int32_t activation_index; /**< Activation index */
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Explicit) as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Explicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for MaxPool2D(Explicit)
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for MaxPool2D(Explicit) with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for MaxPool2D(Explicit)
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for MaxPool2D(Explicit)
+ * @return Parameters of MaxPool2D(Explicit)
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for MaxPool2D(Explicit)
+ * @param [in] v Node visitor for invoking visit function of MaxPool2D(Explicit)
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
@@ -81,40 +120,73 @@ private:
namespace Implicit
{
+/**
+ * @brief Struct of MaxPool2D(Implicit) operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
+ int32_t ifm_index; /**< Input format index */
- int32_t kw_index;
- int32_t kh_index;
+ int32_t kw_index; /**< Kernel width index */
+ int32_t kh_index; /**< Kernel height index */
- int32_t hstride_index;
- int32_t vstride_index;
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
- int32_t padding_index;
- int32_t activation_index;
+ int32_t padding_index; /**< Padding index */
+ int32_t activation_index; /**< Activation index */
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Implicit) as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Implicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for MaxPool2D(Implicit)
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for MaxPool2D(Implicit) with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for MaxPool2D(Implicit)
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for MaxPool2D(Implicit)
+ * @return Parameters of MaxPool2D(Implicit)
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for MaxPool2D(Implicit)
+ * @param [in] v Node visitor for invoking visit function of MaxPool2D(Implicit)
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Mean.h b/runtimes/pure_arm_compute/src/internal/op/Mean.h
index 385b38dbf..f8e7ed308 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Mean.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Mean.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Mean.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Mean::Param struct
+ * and internal::tflite::op::Mean::Node class
+ */
#ifndef __INTERNAL_OP_MEAN_H__
#define __INTERNAL_OP_MEAN_H__
@@ -30,33 +36,64 @@ namespace op
namespace Mean
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index; // output
-
- int32_t ifm_index; // input
- int32_t axis_index; // axis
- int32_t keep_dims_index; // keep_dims
+ int32_t ofm_index; /**< Index of output feature map */ // output
+ int32_t ifm_index; /**< Index of input feature map */ // input
+ int32_t axis_index; /**< Index of axis */ // axis
+ int32_t keep_dims_index; /**< Index of keep dims */ // keep_dims
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Mul.h b/runtimes/pure_arm_compute/src/internal/op/Mul.h
index ebb72c4be..9710dd057 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Mul.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Mul.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file Mul.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Mul class
+ */
#ifndef __INTERNAL_OP_MUL_H__
#define __INTERNAL_OP_MUL_H__
@@ -30,33 +35,63 @@ namespace op
namespace Mul
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t lhs_index;
- int32_t rhs_index;
- int32_t activation_index;
+ int32_t ofm_index; /** Index of output feature map */
+ int32_t lhs_index; /** Index of lhs */
+ int32_t rhs_index; /** Index of rhs */
+ int32_t activation_index; /** Index of activation */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Neg.cc b/runtimes/pure_arm_compute/src/internal/op/Neg.cc
new file mode 100644
index 000000000..72fecf484
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Neg.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Neg.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Neg.h b/runtimes/pure_arm_compute/src/internal/op/Neg.h
new file mode 100644
index 000000000..77507df3d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Neg.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_NEG_H__
+#define __INTERNAL_OP_NEG_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NEG_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Node.h b/runtimes/pure_arm_compute/src/internal/op/Node.h
index 3927c20f0..be1cbdb5b 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Node.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Node.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Node.h
+ * @brief This file contains struct of Node and NodeVisitor
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_NODE_H__
#define __INTERNAL_OP_NODE_H__
@@ -24,12 +30,26 @@ namespace tflite
namespace op
{
+/**
+ * @brief Struct of operation NodeVisitor
+ */
struct NodeVisitor;
+/**
+ * @brief Struct of operation Node
+ */
struct Node
{
+ /**
+ * @brief Destroy the Node object for operation
+ */
virtual ~Node() = default;
+ /**
+ * @brief Function for accepting node for operation
+ * @param [in] v Node visitor for invoking visit function of operation
+ * @return N/A
+ */
virtual void accept(NodeVisitor &&) const = 0;
};
diff --git a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
index 6d8d10af0..0c1a4001d 100644
--- a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
+++ b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file NodeVisitor.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines NodeVisitor
+ */
+
#ifndef __INTERNAL_OP_NODE_VISITOR_H__
#define __INTERNAL_OP_NODE_VISITOR_H__
@@ -26,6 +32,7 @@
#include "internal/op/Dequantize.h"
#include "internal/op/MaxPool2D.h"
#include "internal/op/AvgPool2D.h"
+#include "internal/op/ArgMax.h"
#include "internal/op/Concat.h"
#include "internal/op/Reshape.h"
#include "internal/op/ResizeBilinear.h"
@@ -33,9 +40,11 @@
#include "internal/op/FullyConnected.h"
#include "internal/op/Softmax.h"
#include "internal/op/ReduceMax.h"
+#include "internal/op/ReduceMin.h"
#include "internal/op/Cast.h"
#include "internal/op/TopKV2.h"
#include "internal/op/Gather.h"
+#include "internal/op/PReLU.h"
#include "internal/op/ReLU.h"
#include "internal/op/ReLU1.h"
#include "internal/op/ReLU6.h"
@@ -49,13 +58,30 @@
#include "internal/op/Floor.h"
#include "internal/op/Split.h"
#include "internal/op/RSQRT.h"
+#include "internal/op/SQRT.h"
#include "internal/op/Pad.h"
#include "internal/op/SpaceToDepth.h"
+#include "internal/op/SpaceToBatchND.h"
#include "internal/op/L2Pool2D.h"
#include "internal/op/EmbeddingLookup.h"
#include "internal/op/HashtableLookup.h"
#include "internal/op/L2Normalization.h"
#include "internal/op/SquaredDifference.h"
+#include "internal/op/LocalResponseNormalization.h"
+#include "internal/op/DepthToSpace.h"
+#include "internal/op/Unpack.h"
+#include "internal/op/Neg.h"
+#include "internal/op/Exp.h"
+#include "internal/op/ReduceSum.h"
+#include "internal/op/Equal.h"
+#include "internal/op/BatchToSpaceNd.h"
+#include "internal/op/TransposeConv.h"
+#include "internal/op/Pack.h"
+#include "internal/op/Abs.h"
+#include "internal/op/NotEqual.h"
+#include "internal/op/LogicalAnd.h"
+#include "internal/op/LogicalNot.h"
+#include "internal/op/LogicalOr.h"
namespace internal
{
@@ -64,54 +90,400 @@ namespace tflite
namespace op
{
+/**
+ * @brief Struct to define visitor for operation Nodes
+ */
struct NodeVisitor
{
+ /**
+ * @brief Destruct NodeVisitor object with default
+ */
virtual ~NodeVisitor() = default;
+ /**
+ * @brief Visit an Add node
+ * @param[in] node Add node to visit
+ * @return N/A
+ */
virtual void visit(const Add::Node &) = 0;
+ /**
+ * @brief Visit a Mul node
+ * @param[in] node Mul node to visit
+ * @return N/A
+ */
virtual void visit(const Sub::Node &) = 0;
+ /**
+ * @brief Visit a Mul node
+ * @param[in] node Mul node to visit
+ * @return N/A
+ */
virtual void visit(const Mul::Node &) = 0;
+ /**
+ * @brief Visit a Div node
+ * @param[in] node Div node to visit
+ * @return N/A
+ */
virtual void visit(const Div::Node &) = 0;
+ /**
+ * @brief Visit a Conv2D node with implicit padding
+ * @param[in] node Conv2D node to visit
+ * @return N/A
+ */
virtual void visit(const Conv2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit a Conv2D node with explicit padding
+ * @param[in] node Conv2D node to visit
+ * @return N/A
+ */
virtual void visit(const Conv2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit a DepthwiseConv2D node with implicit padding
+ * @param[in] node DepthwiseConv2D node to visit
+ * @return N/A
+ */
virtual void visit(const DepthwiseConv2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit a DepthwiseConv2D node with explicit padding
+ * @param[in] node DepthwiseConv2D node to visit
+ * @return N/A
+ */
virtual void visit(const DepthwiseConv2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit a Dequantize node
+ * @param[in] node Dequantize node to visit
+ * @return N/A
+ */
virtual void visit(const Dequantize::Node &) = 0;
+ /**
+ * @brief Visit a MaxPool2D node with implicit padding
+ * @param[in] node MaxPool2D node to visit
+ * @return N/A
+ */
virtual void visit(const MaxPool2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit a MaxPool2D node with explicit padding
+ * @param[in] node MaxPool2D node to visit
+ * @return N/A
+ */
virtual void visit(const MaxPool2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit an AvgPool2D node with implicit padding
+ * @param[in] node AvgPool2D node to visit
+ * @return N/A
+ */
virtual void visit(const AvgPool2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit an AvgPool2D node with explicit padding
+ * @param[in] node AvgPool2D node to visit
+ * @return N/A
+ */
virtual void visit(const AvgPool2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit a Concat node
+ * @param[in] node Concat node to visit
+ * @return N/A
+ */
virtual void visit(const Concat::Node &) = 0;
+ /**
+ * @brief Visit a ArgMax node
+ * @param[in] node ArgMax node to visit
+ * @return N/A
+ */
+ virtual void visit(const ArgMax::Node &) = 0;
+ /**
+ * @brief Visit an Reshape node
+ * @param[in] node Reshape node to visit
+ * @return N/A
+ */
virtual void visit(const Reshape::Node &) = 0;
+ /**
+ * @brief Visit an ResizeBilinear node
+ * @param[in] node ResizeBilinear node to visit
+ * @return N/A
+ */
virtual void visit(const ResizeBilinear::Node &) = 0;
+ /**
+ * @brief Visit a StridedSlice node
+ * @param[in] node StridedSlice node to visit
+ * @return N/A
+ */
virtual void visit(const StridedSlice::Node &) = 0;
+ /**
+ * @brief Visit a FullyConnected node
+ * @param[in] node FullyConnected node to visit
+ * @return N/A
+ */
virtual void visit(const FullyConnected::Node &) = 0;
+ /**
+ * @brief Visit a Softmax node
+ * @param[in] node Softmax node to visit
+ * @return N/A
+ */
virtual void visit(const Softmax::Node &) = 0;
+ /**
+ * @brief Visit a ReduceMax node
+ * @param[in] node ReduceMax node to visit
+ * @return N/A
+ */
virtual void visit(const ReduceMax::Node &) = 0;
+ /**
+ * @brief Visit a ReduceMin node
+ * @param[in] node ReduceMin node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReduceMin::Node &) = 0;
+ /**
+ * @brief Visit a Cast node
+ * @param[in] node Cast node to visit
+ * @return N/A
+ */
virtual void visit(const Cast::Node &) = 0;
+ /**
+ * @brief Visit a TopKV2 node
+ * @param[in] node TopKV2 node to visit
+ * @return N/A
+ */
virtual void visit(const TopKV2::Node &) = 0;
+ /**
+ * @brief Visit a Gather node
+ * @param[in] node Gather node to visit
+ * @return N/A
+ */
virtual void visit(const Gather::Node &) = 0;
+ /**
+ * @brief Visit an PReLU node
+ * @param[in] node PReLU node to visit
+ * @return N/A
+ */
+ virtual void visit(const PReLU::Node &) = 0;
+ /**
+ * @brief Visit an ReLU node
+ * @param[in] node Relu node to visit
+ * @return N/A
+ */
virtual void visit(const ReLU::Node &) = 0;
+ /**
+ * @brief Visit a ReLU1 node
+ * @param[in] node ReLU1 node to visit
+ * @return N/A
+ */
virtual void visit(const ReLU1::Node &) = 0;
+ /**
+ * @brief Visit a ReLU6 node
+ * @param[in] node ReLU6 node to visit
+ * @return N/A
+ */
virtual void visit(const ReLU6::Node &) = 0;
+ /**
+ * @brief Visit a Tanh node
+ * @param[in] node Tanh node to visit
+ * @return N/A
+ */
virtual void visit(const Tanh::Node &) = 0;
+ /**
+ * @brief Visit a Squeeze node
+ * @param[in] node Squeeze node to visit
+ * @return N/A
+ */
virtual void visit(const Squeeze::Node &) = 0;
+ /**
+ * @brief Visit an Logistic node
+ * @param[in] node Logistic node to visit
+ * @return N/A
+ */
virtual void visit(const Logistic::Node &) = 0;
+ /**
+ * @brief Visit a Mean node
+ * @param[in] node Mean node to visit
+ * @return N/A
+ */
virtual void visit(const Mean::Node &) = 0;
+ /**
+ * @brief Visit an RNN node
+ * @param[in] node RNN node to visit
+ * @return N/A
+ */
virtual void visit(const RNN::Node &) = 0;
+ /**
+ * @brief Visit a Transpose node
+ * @param[in] node Transpose node to visit
+ * @return N/A
+ */
virtual void visit(const Transpose::Node &) = 0;
+ /**
+ * @brief Visit an LSTM node
+ * @param[in] node LSTM node to visit
+ * @return N/A
+ */
virtual void visit(const LSTM::Node &) = 0;
+ /**
+ * @brief Visit a Floor node
+ * @param[in] node Floor node to visit
+ * @return N/A
+ */
virtual void visit(const Floor::Node &) = 0;
+ /**
+ * @brief Visit a Split node
+ * @param[in] node Split node to visit
+ * @return N/A
+ */
virtual void visit(const Split::Node &) = 0;
+ /**
+ * @brief Visit an RSQRT node
+ * @param[in] node RSQRT node to visit
+ * @return N/A
+ */
virtual void visit(const RSQRT::Node &) = 0;
+ /**
+ * @brief Visit an SQRT node
+ * @param[in] node SQRT node to visit
+ * @return N/A
+ */
+ virtual void visit(const SQRT::Node &) = 0;
+ /**
+ * @brief Visit a Pad node
+ * @param[in] node Pad node to visit
+ * @return N/A
+ */
virtual void visit(const Pad::Node &) = 0;
+ /**
+ * @brief Visit a SpaceToDepth node
+ * @param[in] node SpaceToDepth node to visit
+ * @return N/A
+ */
virtual void visit(const SpaceToDepth::Node &) = 0;
+ /**
+ * @brief Visit a SpaceToBatchND node
+ * @param[in] node SpaceToBatchND node to visit
+ * @return N/A
+ */
+ virtual void visit(const SpaceToBatchND::Node &) = 0;
+ /**
+ * @brief Visit an L2Pool2D node with implicit padding
+ * @param[in] node L2Pool2D node to visit
+ * @return N/A
+ */
virtual void visit(const L2Pool2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit an L2Pool2D node with explicit padding
+ * @param[in] node L2Pool2D node to visit
+ * @return N/A
+ */
virtual void visit(const L2Pool2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit an EmbeddingLookup node
+ * @param[in] node EmbeddingLookup node to visit
+ * @return N/A
+ */
virtual void visit(const EmbeddingLookup::Node &) = 0;
+ /**
+ * @brief Visit a HashtableLookup node
+ * @param[in] node HashtableLookup node to visit
+ * @return N/A
+ */
virtual void visit(const HashtableLookup::Node &) = 0;
+ /**
+ * @brief Visit an L2Normalization node
+ * @param[in] node L2Normalization node to visit
+ * @return N/A
+ */
virtual void visit(const L2Normalization::Node &) = 0;
+ /**
+ * @brief Visit a SquaredDifference node
+ * @param[in] node SquaredDifference node to visit
+ * @return N/A
+ */
virtual void visit(const SquaredDifference::Node &) = 0;
+ /**
+ * @brief Visit a LocalResponseNormalization node
+ * @param[in] node LocalResponseNormalization node to visit
+ * @return N/A
+ */
+ virtual void visit(const LocalResponseNormalization::Node &) = 0;
+ /**
+ * @brief Visit a DepthToSpace node
+ * @param[in] node DepthToSpace node to visit
+ * @return N/A
+ */
+ virtual void visit(const DepthToSpace::Node &) = 0;
+ /**
+ * @brief Visit a Unpack node
+ * @param[in] node Unpack node to visit
+ * @return N/A
+ */
+ virtual void visit(const Unpack::Node &) = 0;
+ /**
+ * @brief Visit a Neg node
+ * @param[in] node Neg node to visit
+ * @return N/A
+ */
+ virtual void visit(const Neg::Node &) = 0;
+ /**
+ * @brief Visit a Exp node
+ * @param[in] node Exp node to visit
+ * @return N/A
+ */
+ virtual void visit(const Exp::Node &) = 0;
+ /**
+ * @brief Visit a ReduceSum node
+ * @param[in] node ReduceSum node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReduceSum::Node &) = 0;
+ /**
+ * @brief Visit a Equal node
+ * @param[in] node Equal node to visit
+ * @return N/A
+ */
+ virtual void visit(const Equal::Node &) = 0;
+ /**
+ * @brief Visit a BatchToSpaceNd node
+ * @param[in] node BatchToSpaceNd node to visit
+ * @return N/A
+ */
+ virtual void visit(const BatchToSpaceNd::Node &) = 0;
+ /**
+ * @brief Visit a TransposeConv node
+ * @param[in] node TransposeConv node to visit
+ * @return N/A
+ */
+ virtual void visit(const TransposeConv::Node &) = 0;
+ /**
+ * @brief Visit a Pack node
+ * @param[in] node Pack node to visit
+ * @return N/A
+ */
+ virtual void visit(const Pack::Node &) = 0;
+ /**
+ * @brief Visit a Abs node
+ * @param[in] node Abs node to visit
+ * @return N/A
+ */
+ virtual void visit(const Abs::Node &) = 0;
+ /**
+ * @brief Visit a NotEqual node
+ * @param[in] node NotEqual node to visit
+ * @return N/A
+ */
+ virtual void visit(const NotEqual::Node &) = 0;
+ /**
+ * @brief Visit a LogicalAnd node
+ * @param[in] node LogicalAnd node to visit
+ * @return N/A
+ */
+ virtual void visit(const LogicalAnd::Node &) = 0;
+ /**
+ * @brief Visit a LogicalNot node
+ * @param[in] node LogicalNot node to visit
+ * @return N/A
+ */
+ virtual void visit(const LogicalNot::Node &) = 0;
+ /**
+ * @brief Visit a LogicalOr node
+ * @param[in] node LogicalOr node to visit
+ * @return N/A
+ */
+ virtual void visit(const LogicalOr::Node &) = 0;
};
} // namespace op
diff --git a/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc b/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc
new file mode 100644
index 000000000..2906e214b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/NotEqual.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/NotEqual.h b/runtimes/pure_arm_compute/src/internal/op/NotEqual.h
new file mode 100644
index 000000000..0d6130948
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/NotEqual.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_NOT_EQUAL_H__
+#define __INTERNAL_OP_NOT_EQUAL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NOT_EQUAL_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/PReLU.cc b/runtimes/pure_arm_compute/src/internal/op/PReLU.cc
new file mode 100644
index 000000000..25b06505b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/PReLU.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/PReLU.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> alpha Tensor Index
+ ifm_index = inputs[0];
+ alpha_index = inputs[1];
+}
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/PReLU.h b/runtimes/pure_arm_compute/src/internal/op/PReLU.h
new file mode 100644
index 000000000..ae754abb4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/PReLU.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReLU.h
+ * @brief This file contains accept function and params for ReLU operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_PRELU_H__
+#define __INTERNAL_OP_PRELU_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+/**
+ * @brief Struct of PReLU operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+ int32_t alpha_index; /**< Alpha input index */
+
+ /**
+ * @brief Construct a new Param object for ReLU as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for PReLU with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for PReLU
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for PReLU with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for PReLU
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for PReLU
+ * @return Parameters of PReLU
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for PReLU
+ * @param [in] v Node visitor for invoking visit function of PReLU
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PRELU_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pack.cc b/runtimes/pure_arm_compute/src/internal/op/Pack.cc
new file mode 100644
index 000000000..73f89b840
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Pack.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Pack.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(outputCount == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 .. n - 3 -> Input Tensor Index
+ // n - 2 -> Input Tensor counts (will be ignored)
+ // n - 1 -> Input Axis Index
+ ofm_index = outputs[0];
+ axis_index = inputs[inputCount - 1];
+ // last input is axis along which packing is required
+ for (uint32_t n = 0; n < inputCount - 2; ++n)
+ {
+ ifm_indexes.emplace_back(inputs[n]);
+ }
+}
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pack.h b/runtimes/pure_arm_compute/src/internal/op/Pack.h
new file mode 100644
index 000000000..c5de01bd8
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Pack.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_PACK_H__
+#define __INTERNAL_OP_PACK_H__
+
+#include "internal/op/Node.h"
+#include <vector>
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+struct Param
+{
+ int32_t ofm_index;
+ // There are N+1 inputs, 0 to N-1 are tensors of same shape
+ // Nth input is axis index along which stack is needed to be done.
+ std::vector<int32_t> ifm_indexes;
+ int32_t axis_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PACK_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pad.cc b/runtimes/pure_arm_compute/src/internal/op/Pad.cc
index 24d08bf36..00938242b 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Pad.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/Pad.cc
@@ -1,63 +1,63 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "internal/op/Pad.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
- const uint32_t *outputs)
-{
- assert(inputCount == 2 && outputCount == 1);
- ofm_index = outputs[0];
-
- // Each input should be interpreted as follows:
- //
- // 0 -> input Tensor Index
- // 1 -> paddings
- ifm_index = inputs[0];
- paddings_index = inputs[1];
-}
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Pad.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> paddings
+ ifm_index = inputs[0];
+ paddings_index = inputs[1];
+}
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pad.h b/runtimes/pure_arm_compute/src/internal/op/Pad.h
index e3ddae44c..68752a10e 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Pad.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Pad.h
@@ -1,69 +1,107 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_OP_PAD_H__
-#define __INTERNAL_OP_PAD_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-struct Param
-{
- int32_t ifm_index;
- int32_t paddings_index;
- int32_t ofm_index;
-
- Param() = default;
- Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
- Node(const Param &param) : _param(param)
- {
- // DO NOTHING
- }
-
-public:
- virtual ~Node() = default;
-
-public:
- const Param &param(void) const { return _param; }
-
-public:
- void accept(NodeVisitor &&) const override;
-
-private:
- const Param _param;
-};
-
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_PAD_H_
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Pad.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Pad node
+ */
+
+#ifndef __INTERNAL_OP_PAD_H__
+#define __INTERNAL_OP_PAD_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+/**
+ * @brief Struct to manipulate parameter for Pad operation
+ */
+struct Param
+{
+ int32_t ifm_index; //!< index for input
+ int32_t paddings_index; //!< index for padding
+ int32_t ofm_index; //!< index for output
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Pad Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new RNN Node object
+ * @param param Parameter for RNN Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Pad node
+};
+
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PAD_H_
diff --git a/runtimes/pure_arm_compute/src/internal/op/RSQRT.h b/runtimes/pure_arm_compute/src/internal/op/RSQRT.h
index e384b27f2..e39d60241 100644
--- a/runtimes/pure_arm_compute/src/internal/op/RSQRT.h
+++ b/runtimes/pure_arm_compute/src/internal/op/RSQRT.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file RSQRT.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::RSQRT::Param struct
+ * and internal::tflite::op::RSQRT::Node class
+ */
#ifndef __INTERNAL_OP_RSQRT_H__
#define __INTERNAL_OP_RSQRT_H__
@@ -30,31 +36,61 @@ namespace op
namespace RSQRT
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t output_index;
-
- int32_t input_index;
+ int32_t output_index; /**< Index of output feature map */
+ int32_t input_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU.h b/runtimes/pure_arm_compute/src/internal/op/ReLU.h
index 64dcf2e14..aaa39b523 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReLU.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReLU.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file ReLU.h
+ * @brief This file contains accept function and params for ReLU operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_RELU_H__
#define __INTERNAL_OP_RELU_H__
@@ -30,31 +36,64 @@ namespace op
namespace ReLU
{
+/**
+ * @brief Struct of ReLU operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
+ int32_t ifm_index; /**< Input format index */
+ /**
+ * @brief Construct a new Param object for ReLU as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for ReLU with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for ReLU
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for ReLU with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for ReLU
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for ReLU
+ * @return Parameters of ReLU
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for ReLU
+ * @param [in] v Node visitor for invoking visit function of ReLU
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU1.h b/runtimes/pure_arm_compute/src/internal/op/ReLU1.h
index 997a9faff..330445af8 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReLU1.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReLU1.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file ReLU1.h
+ * @brief This file contains accept function and params for ReLU1 operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_RELU1_H__
#define __INTERNAL_OP_RELU1_H__
@@ -30,31 +36,64 @@ namespace op
namespace ReLU1
{
+/**
+ * @brief Struct of ReLU1 operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
+ int32_t ifm_index; /**< Input format index */
+ /**
+ * @brief Construct a new Param object for ReLU1 as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for ReLU1 with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for ReLU1
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for ReLU1 with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for ReLU1
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for ReLU1
+ * @return Parameters of ReLU1
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for ReLU1
+ * @param [in] v Node visitor for invoking visit function of ReLU1
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU6.h b/runtimes/pure_arm_compute/src/internal/op/ReLU6.h
index 77c55b64c..6fc2c24fe 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReLU6.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReLU6.h
@@ -14,6 +14,11 @@
* limitations under the License.
*/
+/**
+ * @file ReLU6.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ReLU6 class
+ */
#ifndef __INTERNAL_OP_RELU6_H__
#define __INTERNAL_OP_RELU6_H__
@@ -30,31 +35,61 @@ namespace op
namespace ReLU6
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
+ int32_t ofm_index; /** Index of output feature map */
+ int32_t ifm_index; /** Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h b/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h
index e3278aacf..77d8bd869 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file ReduceMax.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ReduceMax::Param struct
+ * and internal::tflite::op::ReduceMax::Node class
+ */
#ifndef __INTERNAL_OP_REDUCEMAX_H__
#define __INTERNAL_OP_REDUCEMAX_H__
@@ -30,32 +36,63 @@ namespace op
namespace ReduceMax
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
- int32_t axis_index;
+ int32_t ofm_index; /**< Index of output feature map */
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t axis_index; /**< Index of axis */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc
new file mode 100644
index 000000000..72b6079d4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceMin.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h
new file mode 100644
index 000000000..5dd82ec43
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReduceMin.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ReduceMin::Param struct
+ * and internal::tflite::op::ReduceMin::Node class
+ */
+#ifndef __INTERNAL_OP_REDUCEMIN_H__
+#define __INTERNAL_OP_REDUCEMIN_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t axis_index; /**< Index of axis */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCEMIN_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc
new file mode 100644
index 000000000..4d83c1734
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceSum.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h
new file mode 100644
index 000000000..9c661f63a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_REDUCESUM_H__
+#define __INTERNAL_OP_REDUCESUM_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+ int32_t axis_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCESUM_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Reshape.h b/runtimes/pure_arm_compute/src/internal/op/Reshape.h
index ab77ade8c..7152eaece 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Reshape.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Reshape.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Reshape.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Reshape node
+ */
+
#ifndef __INTERNAL_OP_RESHAPE_H__
#define __INTERNAL_OP_RESHAPE_H__
@@ -30,36 +36,68 @@ namespace op
namespace Reshape
{
+/**
+ * @brief Struct to manipulate parameter for Reshape operation
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; //!< index for output feature map
- int32_t input_index;
- int32_t shape_index;
+ int32_t input_index; //!< index for input feature map
+ int32_t shape_index; //!< index for shape
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define Reshape Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Reshape Node object
+ * @param param Parameter for Reshape Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for Reshape node
};
} // namespace Reshape
diff --git a/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h b/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h
index bf216b75f..f2eab4aaf 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file ResizeBilinear.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ResizeBilinear::Param struct
+ * and internal::tflite::op::ResizeBilinear::Node class
+ */
#ifndef __INTERNAL_OP_RESIZE_BILINEAR_H__
#define __INTERNAL_OP_RESIZE_BILINEAR_H__
@@ -30,33 +36,64 @@ namespace op
namespace ResizeBilinear
{
+/**
+ * @brief Struct to have indexes for ResizeBilinear operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t ifm_index;
- int32_t height_index;
- int32_t width_index;
+ int32_t ofm_index; /**< Index of output feature map */
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t height_index; /**< Index of height */
+ int32_t width_index; /**< Index of width */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an ResizeBilinear operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Rnn.h b/runtimes/pure_arm_compute/src/internal/op/Rnn.h
index c436a0987..7b2a10843 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Rnn.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Rnn.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Rnn.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines RNN node
+ */
+
#ifndef __INTERNAL_OP_RNN_H__
#define __INTERNAL_OP_RNN_H__
@@ -30,41 +36,73 @@ namespace op
namespace RNN
{
+/**
+ * @brief Struct to manipulate parameter for RNN operation
+ */
struct Param
{
- int32_t output_index;
- int32_t hidden_state_out_index;
+ int32_t output_index; //!< index for ouuput
+ int32_t hidden_state_out_index; //!< index for hidden state output
- int32_t input_index;
- int32_t weights_index;
- int32_t recurrent_weights_index;
- int32_t bias_index;
- int32_t hidden_state_in_index;
- int32_t fused_activation_index;
+ int32_t input_index; //!< index for input
+ int32_t weights_index; //!< index for weight
+ int32_t recurrent_weights_index; //!< index for recurrent weights
+ int32_t bias_index; //!< index for bias
+ int32_t hidden_state_in_index; //!< index for hidden state input
+ int32_t fused_activation_index; //!< index for fused activation
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define RNN Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new RNN Node object
+ * @param param Parameter for RNN Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for RNN node
};
} // namespace RNN
diff --git a/runtimes/pure_arm_compute/src/internal/op/SQRT.cc b/runtimes/pure_arm_compute/src/internal/op/SQRT.cc
new file mode 100644
index 000000000..70ce42e9c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SQRT.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SQRT.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ input_index = inputs[0];
+}
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/SQRT.h b/runtimes/pure_arm_compute/src/internal/op/SQRT.h
new file mode 100644
index 000000000..85dfb97a7
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SQRT.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file SQRT.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::SQRT::Param struct
+ * and internal::tflite::op::SQRT::Node class
+ */
+#ifndef __INTERNAL_OP_SQRT_H__
+#define __INTERNAL_OP_SQRT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+
+ int32_t input_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SQRT_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Softmax.h b/runtimes/pure_arm_compute/src/internal/op/Softmax.h
index 746f6b4e6..6e631af5f 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Softmax.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Softmax.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Softmax.h
+ * @brief This file contains accept function and params for Softmax operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_SOFTMAX_H__
#define __INTERNAL_OP_SOFTMAX_H__
@@ -30,32 +36,65 @@ namespace op
namespace Softmax
{
+/**
+ * @brief Struct of Softmax operation's param
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; /**< Output index */
- int32_t input_index;
- int32_t scale_index;
+ int32_t input_index; /**< Input index */
+ int32_t scale_index; /**< Scale index */
+ /**
+ * @brief Construct a new Param object for Softmax as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Softmax with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Softmax
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for Softmax with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for Softmax
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for Softmax
+ * @return Parameters of Softmax
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for Softmax
+ * @param [in] v Node visitor for invoking visit function of Softmax
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc
new file mode 100644
index 000000000..9ab026cf4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SpaceToBatchND.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ // 2 -> Padding size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+ padding_size_index = inputs[2];
+}
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h
new file mode 100644
index 000000000..650d068f4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_SPACETOBATCHND_H__
+#define __INTERNAL_OP_SPACETOBATCHND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+ int32_t block_size_index;
+ int32_t padding_size_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SPACETOBATCHND_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h
index 81bfe5246..2e624006a 100644
--- a/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h
+++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file SpaceToDepth.h
+ * @brief This file contains accept function and params for SpaceToDepth operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_SPACETODEPTH_H__
#define __INTERNAL_OP_SPACETODEPTH_H__
@@ -30,32 +36,65 @@ namespace op
namespace SpaceToDepth
{
+/**
+ * @brief Struct of SpaceToDepth operation's param
+ */
struct Param
{
- int32_t output_index;
+ int32_t output_index; /**< Output index */
- int32_t input_index;
- int32_t block_size_index;
+ int32_t input_index; /**< Input index */
+ int32_t block_size_index; /**< Block size index */
+ /**
+ * @brief Construct a new Param object for SpaceToDepth as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for SpaceToDepth with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for SpaceToDepth
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for SpaceToDepth with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for SpaceToDepth
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for SpaceToDepth
+ * @return Parameters of SpaceToDepth
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for SpaceToDepth
+ * @param [in] v Node visitor for invoking visit function of SpaceToDepth
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Split.h b/runtimes/pure_arm_compute/src/internal/op/Split.h
index 8bea1000d..b2c6c2fd1 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Split.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Split.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Split.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Split node
+ */
+
#ifndef __INTERNAL_OP_SPLIT_H__
#define __INTERNAL_OP_SPLIT_H__
@@ -31,36 +37,68 @@ namespace op
namespace Split
{
+/**
+ * @brief Struct to manipulate parameter for Split operation
+ */
struct Param
{
- int32_t axis_index;
- int32_t ifm_index;
+ int32_t axis_index; //!< index for axis
+ int32_t ifm_index; //!< index for input feature map
- std::vector<int32_t> ofm_indexes;
+ std::vector<int32_t> ofm_indexes; //!< index for output feature map
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define Split Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Split Node object
+ * @param param Parameter for Split Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for Split node
};
} // namespace Split
diff --git a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc
index c2c4f7242..f6c8bc5df 100644
--- a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "internal/op/SquaredDifference.h"
#include "internal/op/NodeVisitor.h"
@@ -30,7 +46,7 @@ namespace SquaredDifference
Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
const uint32_t *outputs)
{
- assert(inputCount == 3 && outputCount == 1);
+ assert(inputCount == 2 && outputCount == 1);
ofm_index = outputs[0];
@@ -38,10 +54,8 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
//
// 0 -> LHS Tensor Index
// 1 -> RHS Tensor Index
- // 2 -> Activation Index
lhs_index = inputs[0];
rhs_index = inputs[1];
- activation_index = inputs[2];
}
} // namespace SquaredDifference
diff --git a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h
index 7760405b9..ecbb03209 100644
--- a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h
+++ b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h
@@ -1,3 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file SquaredDifference.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::SquaredDifference::Param struct
+ * and internal::tflite::op::SquaredDifference::Node class
+ */
#ifndef __INTERNAL_OP_SQUAREDDIFFERENCE_H__
#define __INTERNAL_OP_SQUAREDDIFFERENCE_H__
@@ -14,33 +36,62 @@ namespace op
namespace SquaredDifference
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t ofm_index;
-
- int32_t lhs_index;
- int32_t rhs_index;
- int32_t activation_index;
+ int32_t ofm_index; /**< Index of output feature map */
+ int32_t lhs_index; /**< Index of lhs */
+ int32_t rhs_index; /**< Index of rhs */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Squeeze.h b/runtimes/pure_arm_compute/src/internal/op/Squeeze.h
index e871067f5..d5f36f85f 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Squeeze.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Squeeze.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Squeeze.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Squeeze::Param struct
+ * and internal::tflite::op::Squeeze::Node class
+ */
#ifndef __INTERNAL_OP_SQUEEZE_H__
#define __INTERNAL_OP_SQUEEZE_H__
@@ -30,32 +36,64 @@ namespace op
namespace Squeeze
{
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
struct Param
{
- int32_t output_index;
-
- int32_t input_index;
- int32_t dims_index_optional = -1; // optional param. default is -1
+ int32_t output_index; /**< Index of output feature map */
+ int32_t input_index; /**< Index of input feature map */
+ // optional param. default is -1
+ int32_t dims_index_optional = -1; /**< Index of dims */
+ /**
+ * @brief Construct as default
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to represent an operation of data structure
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destruct as default
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h b/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h
index 26bb81a8c..21dbb9e68 100644
--- a/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h
+++ b/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file StridedSlice.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines StridedSlice node
+ */
+
#ifndef __INTERNAL_OP_STRIDEDSLICE_H__
#define __INTERNAL_OP_STRIDEDSLICE_H__
@@ -30,41 +36,73 @@ namespace op
namespace StridedSlice
{
+/**
+ * @brief Struct to manipulate parameter for StridedSlice operation
+ */
struct Param
{
- int32_t outputData_index;
+ int32_t outputData_index; //!< index for output data
- int32_t inputData_index;
- int32_t startData_index;
- int32_t endData_index;
- int32_t stridesData_index;
- int32_t beginMask_index;
- int32_t endMask_index;
- int32_t shrinkAxisMask_index;
+ int32_t inputData_index; //!< index for input data
+ int32_t startData_index; //!< index where slicing start from
+ int32_t endData_index; //!< index where slicing ends to
+ int32_t stridesData_index; //!< index for stride value
+ int32_t beginMask_index; //!< index for beginmask
+ int32_t endMask_index; //!< index for endmask
+ int32_t shrinkAxisMask_index; //!< index for shrink axis
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define StridedSlice Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new StridedSlice Node object
+ * @param param Parameter for StridedSlice Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for StridedSlice node
};
} // namespace StridedSlice
diff --git a/runtimes/pure_arm_compute/src/internal/op/Sub.h b/runtimes/pure_arm_compute/src/internal/op/Sub.h
index 3da271029..864359d1e 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Sub.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Sub.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Sub.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines SUB Node
+ */
+
#ifndef __INTERNAL_OP_SUB_H__
#define __INTERNAL_OP_SUB_H__
@@ -30,37 +36,69 @@ namespace op
namespace Sub
{
+/**
+ * @brief Struct to manipulate parameters for SUB
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; //!< index for output feature map
- int32_t lhs_index;
- int32_t rhs_index;
- int32_t activation_index;
+ int32_t lhs_index; //!< index for left-hand side
+ int32_t rhs_index; //!< index for right-hand side
+ int32_t activation_index; //!< index for activation function
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define SUB Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Sub Node object
+ * @param param Parameter for Sub Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for SUB node
};
} // namespace Sub
diff --git a/runtimes/pure_arm_compute/src/internal/op/Tanh.h b/runtimes/pure_arm_compute/src/internal/op/Tanh.h
index f5a9f102e..fd87297f1 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Tanh.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Tanh.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Tanh.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines TANH node
+ */
+
#ifndef __INTERNAL_OP_TANH_H__
#define __INTERNAL_OP_TANH_H__
@@ -30,35 +36,67 @@ namespace op
namespace Tanh
{
+/**
+ * @brief Struct to manipulate parameter for hyperbolic tangent operation
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; //!< index for output feature map
- int32_t ifm_index;
+ int32_t ifm_index; //!< index for input feature map
+ /**
+ * @brief Default Constructor
+ */
Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define Tanh Operation
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Tanh Node object
+ * @param param Parameter for Tanh Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Default Destructor
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
- const Param _param;
+ const Param _param; //!< parameter for Tanh node
};
} // namespace Tanh
diff --git a/runtimes/pure_arm_compute/src/internal/op/TopKV2.h b/runtimes/pure_arm_compute/src/internal/op/TopKV2.h
index 79bbd1f2e..02b7827e9 100644
--- a/runtimes/pure_arm_compute/src/internal/op/TopKV2.h
+++ b/runtimes/pure_arm_compute/src/internal/op/TopKV2.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file TopKV2.h
+ * @brief This file contains accept function and params for TopKV2 operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_TOPKV2_H__
#define __INTERNAL_OP_TOPKV2_H__
@@ -30,33 +36,66 @@ namespace op
namespace TopKV2
{
+/**
+ * @brief Struct of TopKV2 operation's param
+ */
struct Param
{
- int32_t outputValues_index;
- int32_t outputIndices_index;
+ int32_t outputValues_index; /**< Output values index */
+ int32_t outputIndices_index; /**< Output indices index */
- int32_t inputData_index;
- int32_t k_index;
+ int32_t inputData_index; /**< Input data index */
+ int32_t k_index; /**< K value index */
+ /**
+ * @brief Construct a new Param object for TopKV2 as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for TopKV2 with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for TopKV2
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for TopKV2 with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for TopKV2
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for TopKV2
+ * @return Parameters of TopKV2
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for TopKV2
+ * @param [in] v Node visitor for invoking visit function of TopKV2
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Transpose.h b/runtimes/pure_arm_compute/src/internal/op/Transpose.h
index dac2ef8f2..bb01bf322 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Transpose.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Transpose.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file Transpose.h
+ * @brief This file contains accept function and params for Transpose operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __INTERNAL_OP_TRANSPOSE_H__
#define __INTERNAL_OP_TRANSPOSE_H__
@@ -30,32 +36,65 @@ namespace op
namespace Transpose
{
+/**
+ * @brief Struct of Transpose operation's param
+ */
struct Param
{
- int32_t ofm_index;
+ int32_t ofm_index; /**< Output format index */
- int32_t ifm_index;
- int32_t permu_index;
+ int32_t ifm_index; /**< Input format index */
+ int32_t permu_index; /**< Permutation index */
+ /**
+ * @brief Construct a new Param object for Transpose as default
+ */
Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Transpose with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
};
+/**
+ * @brief Class to define operation node for Transpose
+ */
class Node final : public op::Node
{
public:
+ /**
+ * @brief Construct a new Node object for Transpose with param
+ * @param [in] param Parameters for Node
+ */
Node(const Param &param) : _param(param)
{
// DO NOTHING
}
public:
+ /**
+ * @brief Destroy the Node object for Transpose
+ */
virtual ~Node() = default;
public:
+ /**
+ * @brief Get parameters for Transpose
+ * @return Parameters of Transpose
+ */
const Param &param(void) const { return _param; }
public:
+ /**
+ * @brief Function for accepting node for Transpose
+ * @param [in] v Node visitor for invoking visit function of Transpose
+ * @return N/A
+ */
void accept(NodeVisitor &&) const override;
private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc
new file mode 100644
index 000000000..502eff525
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/TransposeConv.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 6 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Output Shape Index
+ // 1 -> Weights Index
+ // 2 -> Input Tensor Index
+ // 3 -> Padding Type
+ // 4 -> Stride width
+ // 5 -> Stride height
+
+ op_shape_index = inputs[0];
+ ker_index = inputs[1];
+ ifm_index = inputs[2];
+ padding_index = inputs[3];
+ hstride_index = inputs[4];
+ vstride_index = inputs[5];
+}
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h
new file mode 100644
index 000000000..b0122f82d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_TRANSPOSECONV_H__
+#define __INTERNAL_OP_TRANSPOSECONV_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t op_shape_index;
+ int32_t ker_index;
+ int32_t ifm_index;
+ int32_t padding_index;
+ int32_t hstride_index;
+ int32_t vstride_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_TRANSPOSECONV_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Unpack.cc b/runtimes/pure_arm_compute/src/internal/op/Unpack.cc
new file mode 100644
index 000000000..a1be0280c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Unpack.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Unpack.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+// There are three inputs: tensor which is to be unpacked,
+// axis along which tensor needs to be unpacked
+// and number of splits along the axis.
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3);
+
+ ifm_index = inputs[0];
+
+ for (uint32_t n = 0; n < outputCount; ++n)
+ {
+ ofm_indexes.emplace_back(outputs[n]);
+ }
+ num_split_index = inputs[1];
+ axis_index = inputs[2];
+}
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Unpack.h b/runtimes/pure_arm_compute/src/internal/op/Unpack.h
new file mode 100644
index 000000000..575e3d024
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Unpack.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_UNPACK_H__
+#define __INTERNAL_OP_UNPACK_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+
+struct Param
+{
+ int32_t ifm_index;
+ int32_t axis_index;
+ int32_t num_split_index;
+ // There are N outputs after Unpacking Input Tensor along axis
+ std::vector<int32_t> ofm_indexes;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_UNPACK_H__
diff --git a/runtimes/pure_arm_compute/src/logging.h b/runtimes/pure_arm_compute/src/logging.h
index 61b434eda..914b63057 100644
--- a/runtimes/pure_arm_compute/src/logging.h
+++ b/runtimes/pure_arm_compute/src/logging.h
@@ -14,6 +14,12 @@
* limitations under the License.
*/
+/**
+ * @file logging.h
+ * @brief This file contains Context class for logging.
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __PURE_ARM_COMPUTE_LOGGING_H__
#define __PURE_ARM_COMPUTE_LOGGING_H__
@@ -22,9 +28,15 @@
namespace logging
{
+/**
+ * @brief class to define Context for logging
+ */
class Context
{
public:
+ /**
+ * @brief Construct default
+ */
Context() : _enabled{false}
{
auto env = std::getenv("PURE_ARM_COMPUTE_LOG_ENABLE");
@@ -36,12 +48,21 @@ public:
}
public:
+ /**
+ * @brief Get @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise
+ * @c false
+ * @return @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise @c
+ * false
+ */
bool enabled(void) const { return _enabled; }
private:
bool _enabled;
};
+/**
+ * @brief static Context class for logging
+ */
static Context ctx;
} // namespace logging
diff --git a/runtimes/pure_arm_compute/src/memory.cc b/runtimes/pure_arm_compute/src/memory.cc
index 2a9294f86..9e999661a 100644
--- a/runtimes/pure_arm_compute/src/memory.cc
+++ b/runtimes/pure_arm_compute/src/memory.cc
@@ -18,7 +18,7 @@
#include <sys/mman.h>
#include <memory>
-#include "nnfw/std/memory.h"
+#include "cpp14/memory.h"
#include "memory.h"
int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
@@ -31,7 +31,7 @@ int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t
// Use unique pointer to avoid memory leak
std::unique_ptr<ANeuralNetworksMemory> memory_ptr =
- nnfw::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
+ nnfw::cpp14::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
if (memory_ptr == nullptr)
{
return ANEURALNETWORKS_OUT_OF_MEMORY;
diff --git a/runtimes/pure_arm_compute/src/memory.h b/runtimes/pure_arm_compute/src/memory.h
index 8bd43b0d6..ffac26ef6 100644
--- a/runtimes/pure_arm_compute/src/memory.h
+++ b/runtimes/pure_arm_compute/src/memory.h
@@ -14,20 +14,51 @@
* limitations under the License.
*/
+/**
+ * @file execution.h
+ * @brief This file defines ANeuralNetworksMemory class for handling Memory NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __MEMORY_H__
#define __MEMORY_H__
#include <cstdint>
+/**
+ * @brief struct to define Memory NNAPI
+ */
struct ANeuralNetworksMemory
{
public:
+ /**
+ * @brief Constructor with params
+ * @param [in] size The requested size in bytes
+ * @param [in] protect The desired memory protection for the mapping
+ * @param [in] fd The requested file descriptor
+ * @param [in] offset The offset to the beginning of the file of the area to map
+ */
ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset);
+ /**
+ * @brief Destructor default
+ */
~ANeuralNetworksMemory();
public:
+ /**
+ * @brief Get size
+ * @return size
+ */
size_t size(void) const { return _size; }
+ /**
+ * @brief Get base pointer
+ * @return base pointer
+ */
uint8_t *base(void) { return _base; }
+ /**
+ * @brief Get base pointer
+ * @return const base pointer
+ */
const uint8_t *base(void) const { return _base; }
private:
diff --git a/runtimes/pure_arm_compute/src/model.cc b/runtimes/pure_arm_compute/src/model.cc
index 49ea59f17..2c4120d7a 100644
--- a/runtimes/pure_arm_compute/src/model.cc
+++ b/runtimes/pure_arm_compute/src/model.cc
@@ -602,6 +602,28 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
break;
}
+ case ANEURALNETWORKS_SPACE_TO_BATCH_ND:
+ {
+ using internal::tflite::op::SpaceToBatchND::Param;
+ using internal::tflite::op::SpaceToBatchND::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_BATCH_TO_SPACE_ND:
+ {
+ using internal::tflite::op::BatchToSpaceNd::Param;
+ using internal::tflite::op::BatchToSpaceNd::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
case ANEURALNETWORKS_L2_POOL_2D:
{
// Input count is 7 for Implicit Padding
@@ -675,6 +697,29 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
break;
}
+ case ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION:
+ {
+
+ using internal::tflite::op::LocalResponseNormalization::Param;
+ using internal::tflite::op::LocalResponseNormalization::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_DEPTH_TO_SPACE:
+ {
+ using internal::tflite::op::DepthToSpace::Param;
+ using internal::tflite::op::DepthToSpace::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
default:
throw std::runtime_error{"Not supported operation"};
};
@@ -706,6 +751,18 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
break;
}
+ case ANEURALNETWORKS_REDUCE_MIN_EX:
+ {
+ using internal::tflite::op::ReduceMin::Param;
+ using internal::tflite::op::ReduceMin::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
case ANEURALNETWORKS_TENSORFLOW_MAX_EX:
{
using internal::tflite::op::ReduceMax::Param;
@@ -718,6 +775,53 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
break;
}
+ case ANEURALNETWORKS_PRELU_EX:
+ {
+ using internal::tflite::op::PReLU::Param;
+ using internal::tflite::op::PReLU::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_TRANSPOSE_CONV_EX:
+ {
+ using internal::tflite::op::TransposeConv::Param;
+ using internal::tflite::op::TransposeConv::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOGICAL_AND_EX:
+ {
+ using internal::tflite::op::LogicalAnd::Param;
+ using internal::tflite::op::LogicalAnd::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOGICAL_OR_EX:
+ {
+ using internal::tflite::op::LogicalOr::Param;
+ using internal::tflite::op::LogicalOr::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
case ANEURALNETWORKS_RSQRT_EX:
{
using internal::tflite::op::RSQRT::Param;
@@ -730,6 +834,30 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
break;
}
+ case ANEURALNETWORKS_SQRT_EX:
+ {
+ using internal::tflite::op::SQRT::Param;
+ using internal::tflite::op::SQRT::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_EQUAL_EX:
+ {
+ using internal::tflite::op::Equal::Param;
+ using internal::tflite::op::Equal::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
case ANEURALNETWORKS_SQUARED_DIFFERENCE_EX:
{
using internal::tflite::op::SquaredDifference::Param;
@@ -778,6 +906,101 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
break;
}
+ case ANEURALNETWORKS_UNPACK_EX:
+ {
+ using internal::tflite::op::Unpack::Param;
+ using internal::tflite::op::Unpack::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_NEG_EX:
+ {
+ using internal::tflite::op::Neg::Param;
+ using internal::tflite::op::Neg::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_EXP_EX:
+ {
+ using internal::tflite::op::Exp::Param;
+ using internal::tflite::op::Exp::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_REDUCE_SUM_EX:
+ {
+ using internal::tflite::op::ReduceSum::Param;
+ using internal::tflite::op::ReduceSum::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_PACK_EX:
+ {
+ using internal::tflite::op::Pack::Param;
+ using internal::tflite::op::Pack::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_ABS_EX:
+ {
+ using internal::tflite::op::Abs::Param;
+ using internal::tflite::op::Abs::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_ARGMAX_EX:
+ {
+ using internal::tflite::op::ArgMax::Param;
+ using internal::tflite::op::ArgMax::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_NOT_EQUAL_EX:
+ {
+ using internal::tflite::op::NotEqual::Param;
+ using internal::tflite::op::NotEqual::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+
default:
throw std::runtime_error{"Not supported operation"};
}
diff --git a/runtimes/pure_arm_compute/src/model.h b/runtimes/pure_arm_compute/src/model.h
index a7e606201..8acc894f4 100644
--- a/runtimes/pure_arm_compute/src/model.h
+++ b/runtimes/pure_arm_compute/src/model.h
@@ -14,22 +14,52 @@
* limitations under the License.
*/
+/**
+ * @file model.h
+ * @brief This file contains ANeuralNetworksModel classe for handling Model NNAPI such as
+ * ANeuralNetworksModel_create, ANeuralNetworksModel_addOperand
+ * @ingroup COM_AI_RUNTIME
+ */
+
#ifndef __MODEL_H__
#define __MODEL_H__
#include "internal/Model.h"
+/**
+ * @brief struct to express Model of NNAPI
+ */
struct ANeuralNetworksModel
{
public:
+ /**
+ * @brief Construct without params
+ */
ANeuralNetworksModel();
public:
+ /**
+ * @brief Get reference of internal::tflite::Model
+ * @return Reference of internal::tflite::Model
+ */
internal::tflite::Model &deref(void) { return *_model; }
public:
+ /**
+ * @brief Release internal::tflite::Model pointer to param
+ * @param [in] model To get released internal::tflite::Model pointer
+ * @return N/A
+ */
void release(std::shared_ptr<const internal::tflite::Model> &model) { model = _model; }
+ /**
+ * @brief Get @c true if ANeuralNetworksModel_finish has been called, otherwise @c false
+ * @return @c true if ANeuralNetworksModel_finish has been called, otherwise @c false
+ */
bool isFinished() { return _isFinished == true; }
+ /**
+ * @brief Mark model process finished
+ * @return N/A
+ */
void markAsFinished() { _isFinished = true; }
private: