summaryrefslogtreecommitdiff
path: root/runtime/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/contrib')
-rw-r--r--runtime/contrib/CMakeLists.txt1
-rw-r--r--runtime/contrib/README.md10
-rw-r--r--runtime/contrib/TFLiteSharp/README.md92
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/CMakeLists.txt67
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h69
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h55
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp142
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteNative/tflite-native.pc.in13
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp.sln25
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.Libraries.cs23
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.TFLite.cs37
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/TFLiteSharp.csproj52
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Datatype.cs31
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Interpreter.cs263
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest.sln31
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/Program.cs38
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/TFLiteSharpTest.csproj12
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp.csproj54
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_App.cs65
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_Main.cs20
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mobilenet_v1_1.0_224.tflitebin0 -> 16900960 bytes
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse1.bmpbin0 -> 2764854 bytes
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse_224.bmpbin0 -> 150582 bytes
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/shared/res/TFLiteTestApp.pngbin0 -> 10097 bytes
-rw-r--r--runtime/contrib/TFLiteSharp/TFLiteTestApp/tizen-manifest.xml14
-rw-r--r--runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.manifest5
-rw-r--r--runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.spec103
-rw-r--r--runtime/contrib/TFLiteSharp/packaging/tflite-native.manifest5
-rw-r--r--runtime/contrib/android_benchmark_app/AndroidManifest.xml21
-rw-r--r--runtime/contrib/android_benchmark_app/CMakeLists.txt97
-rw-r--r--runtime/contrib/android_benchmark_app/README.md58
-rw-r--r--runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp228
-rw-r--r--runtime/contrib/android_benchmark_app/cpp/ndk_main.h92
-rw-r--r--runtime/contrib/android_benchmark_app/java/com/ndk/tflbench/MainActivity.java110
-rw-r--r--runtime/contrib/android_benchmark_app/res/drawable-hdpi/ic_launcher.pngbin0 -> 9397 bytes
-rw-r--r--runtime/contrib/android_benchmark_app/res/drawable-mdpi/ic_launcher.pngbin0 -> 5237 bytes
-rw-r--r--runtime/contrib/android_benchmark_app/res/drawable-xhdpi/ic_launcher.pngbin0 -> 14383 bytes
-rw-r--r--runtime/contrib/android_benchmark_app/res/drawable-xxhdpi/ic_launcher.pngbin0 -> 19388 bytes
-rw-r--r--runtime/contrib/android_benchmark_app/res/layout/activity_main.xml38
-rw-r--r--runtime/contrib/android_benchmark_app/res/values-v21/styles.xml5
-rw-r--r--runtime/contrib/android_benchmark_app/res/values/strings.xml6
-rw-r--r--runtime/contrib/android_tflite/CMakeLists.txt31
-rw-r--r--runtime/contrib/android_tflite/builtin_ops_jni.cc29
-rw-r--r--runtime/contrib/benchmark_acl/.FORMATDENY0
-rw-r--r--runtime/contrib/benchmark_acl/CMakeLists.txt24
-rw-r--r--runtime/contrib/benchmark_acl/src/Benchmark.cpp74
-rw-r--r--runtime/contrib/benchmark_acl/src/Benchmark.h82
-rw-r--r--runtime/contrib/benchmark_acl/src/benchmark_googlenet.cpp242
-rw-r--r--runtime/contrib/benchmark_acl/src/benchmark_inception_v3.cpp891
-rw-r--r--runtime/contrib/benchmark_acl/src/benchmark_mobilenet.cpp265
-rw-r--r--runtime/contrib/custom_op/README.md25
-rw-r--r--runtime/contrib/custom_op/customOp-workflow.pngbin0 -> 22082 bytes
-rw-r--r--runtime/contrib/detection/CMakeLists.txt11
-rw-r--r--runtime/contrib/detection/detection.cpp74
-rw-r--r--runtime/contrib/heap_trace/CMakeLists.txt18
-rw-r--r--runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc44
-rw-r--r--runtime/contrib/heap_trace/src/cl_release_mem_object.cc43
-rw-r--r--runtime/contrib/heap_trace/src/free_stub.cc35
-rw-r--r--runtime/contrib/heap_trace/src/function_resolver.h29
-rw-r--r--runtime/contrib/heap_trace/src/malloc_stub.cc39
-rw-r--r--runtime/contrib/heap_trace/src/realloc_stub.cc40
-rw-r--r--runtime/contrib/heap_trace/src/symbol_searcher.cc82
-rw-r--r--runtime/contrib/heap_trace/src/symbol_searcher.h22
-rw-r--r--runtime/contrib/heap_trace/src/trace.cc103
-rw-r--r--runtime/contrib/heap_trace/src/trace.h75
-rw-r--r--runtime/contrib/heap_trace/src/valloc_stub.cc39
-rw-r--r--runtime/contrib/heap_trace/tests/CMakeLists.txt43
-rw-r--r--runtime/contrib/heap_trace/tests/src/cl_create_buffer_interception_test.cc89
-rw-r--r--runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc91
-rw-r--r--runtime/contrib/heap_trace/tests/src/common_test_environment.cc51
-rw-r--r--runtime/contrib/heap_trace/tests/src/common_test_environment.h38
-rw-r--r--runtime/contrib/heap_trace/tests/src/file_content_manipulations.cc25
-rw-r--r--runtime/contrib/heap_trace/tests/src/file_content_manipulations.h24
-rw-r--r--runtime/contrib/heap_trace/tests/src/free_interception_test.cc60
-rw-r--r--runtime/contrib/heap_trace/tests/src/main.cc23
-rw-r--r--runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc87
-rw-r--r--runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc110
-rw-r--r--runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc79
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample1.h25
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample1/test_sample1.cc27
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample2.h26
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample2/test_sample2.cc24
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample3.h25
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample3/test_sample3.cc20
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample4.h25
-rw-r--r--runtime/contrib/heap_trace/tests/src/test_sample4/test_sample4.cc21
-rw-r--r--runtime/contrib/heap_trace/tests/src/trace_test.cc175
-rw-r--r--runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc73
-rw-r--r--runtime/contrib/labs/CMakeLists.txt5
-rw-r--r--runtime/contrib/labs/jniacl/CMakeLists.txt18
-rw-r--r--runtime/contrib/labs/jniacl/src/io_accessor.cc96
-rw-r--r--runtime/contrib/labs/jniacl/src/io_accessor.h93
-rw-r--r--runtime/contrib/labs/jniacl/src/jniacl_main.cc37
-rw-r--r--runtime/contrib/labs/opencl_test/CMakeLists.txt11
-rw-r--r--runtime/contrib/labs/opencl_test/README.md8
-rw-r--r--runtime/contrib/labs/opencl_test/src/opencl_test.cc386
-rw-r--r--runtime/contrib/labs/tflite_examples/CMakeLists.txt2
-rw-r--r--runtime/contrib/labs/tflite_examples/src/conv.cpp330
-rw-r--r--runtime/contrib/logging/CMakeLists.txt12
-rw-r--r--runtime/contrib/logging/include/operand.def12
-rw-r--r--runtime/contrib/logging/include/operation.def15
-rw-r--r--runtime/contrib/logging/src/nnapi_logging.cc399
-rw-r--r--runtime/contrib/mlapse/CMakeLists.txt8
-rw-r--r--runtime/contrib/mlapse/README.md3
-rw-r--r--runtime/contrib/mlapse/tfl/CMakeLists.txt12
-rw-r--r--runtime/contrib/mlapse/tfl/driver.cc280
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.cc67
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.h50
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.cc24
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h77
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.cc124
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h63
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/multicast_observer.cc17
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/multicast_observer.h75
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/tfl/load.cc55
-rw-r--r--runtime/contrib/mlapse/tfl/mlapse/tfl/load.h40
-rw-r--r--runtime/contrib/pure_arm_compute/CMakeLists.txt29
-rw-r--r--runtime/contrib/pure_arm_compute/src/compilation.cc6434
-rw-r--r--runtime/contrib/pure_arm_compute/src/compilation.h75
-rw-r--r--runtime/contrib/pure_arm_compute/src/event.cc31
-rw-r--r--runtime/contrib/pure_arm_compute/src/event.h33
-rw-r--r--runtime/contrib/pure_arm_compute/src/execution.cc628
-rw-r--r--runtime/contrib/pure_arm_compute/src/execution.h119
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/FeatureSink.h80
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/FeatureSource.h77
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/IExecutionBuilder.h49
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h91
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h82
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Model.cc128
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Model.h538
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Sink.h45
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Sinks.h97
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Source.h46
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Swizzle.h115
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h89
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h89
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/TensorSource.h83
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/VectorSink.h73
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/VectorSource.h69
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc87
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute.h337
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.cc152
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h156
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/feature/View.h156
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/kernel/View.h110
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/matrix/View.h104
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/arm_compute/tensor/View.h112
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Reader.h105
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Utils.h82
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/View.h132
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/kernel/Reader.h94
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/matrix/Reader.h90
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h111
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/Reader.h116
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/View.h121
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Abs.cc59
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Abs.h68
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Add.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Add.h110
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.cc64
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.h70
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.cc124
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.h198
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.h83
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Cast.cc62
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Cast.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Concat.cc69
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Concat.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.cc126
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.h200
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.h70
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc128
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.h198
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.cc62
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.h106
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Div.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Div.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Equal.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Equal.h83
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Exp.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Exp.h69
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Floor.cc62
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Floor.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.cc69
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.h114
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Gather.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Gather.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.cc68
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.cc60
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.h106
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.cc124
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.h198
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc64
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.h73
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.h83
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.cc60
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.h82
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.h83
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Logistic.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Logistic.h105
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Lstm.cc85
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Lstm.h131
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.cc124
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.h202
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Mean.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Mean.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Mul.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Mul.h106
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Neg.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Neg.h69
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Node.h60
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/NodeVisitor.h493
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.h83
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/PReLU.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/PReLU.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Pack.cc69
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Pack.h72
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Pad.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Pad.h107
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.cc62
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.h105
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReLU.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReLU.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.h104
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.h107
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.h107
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.h70
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Reshape.cc66
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Reshape.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Rnn.cc66
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Rnn.h113
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SQRT.cc62
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SQRT.h105
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Softmax.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Softmax.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.h71
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Split.cc72
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Split.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.cc64
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.h106
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.cc66
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.h108
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.cc88
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.h113
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Sub.cc67
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Sub.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Tanh.cc63
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Tanh.h107
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.cc70
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.h110
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Transpose.cc65
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Transpose.h109
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.cc74
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.h74
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Unpack.cc68
-rw-r--r--runtime/contrib/pure_arm_compute/src/internal/op/Unpack.h72
-rw-r--r--runtime/contrib/pure_arm_compute/src/library_info.cc17
-rw-r--r--runtime/contrib/pure_arm_compute/src/logging.h74
-rw-r--r--runtime/contrib/pure_arm_compute/src/memory.cc55
-rw-r--r--runtime/contrib/pure_arm_compute/src/memory.h69
-rw-r--r--runtime/contrib/pure_arm_compute/src/model.cc1082
-rw-r--r--runtime/contrib/pure_arm_compute/src/model.h70
-rw-r--r--runtime/contrib/pure_arm_compute/symbolcheck.cpp64
-rw-r--r--runtime/contrib/tflite_classify/CMakeLists.txt22
-rw-r--r--runtime/contrib/tflite_classify/src/ImageClassifier.cc107
-rw-r--r--runtime/contrib/tflite_classify/src/ImageClassifier.h99
-rw-r--r--runtime/contrib/tflite_classify/src/InferenceInterface.cc114
-rw-r--r--runtime/contrib/tflite_classify/src/InferenceInterface.h93
-rw-r--r--runtime/contrib/tflite_classify/src/tflite_classify.cc132
-rw-r--r--runtime/contrib/tflite_test/CMakeLists.txt16
-rw-r--r--runtime/contrib/tflite_test/tflite_test.cpp239
-rw-r--r--runtime/contrib/uben/CMakeLists.txt29
-rw-r--r--runtime/contrib/uben/Convolution.cpp429
-rw-r--r--runtime/contrib/uben/Softmax.cpp54
293 files changed, 32642 insertions, 0 deletions
diff --git a/runtime/contrib/CMakeLists.txt b/runtime/contrib/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/runtime/contrib/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/runtime/contrib/README.md b/runtime/contrib/README.md
new file mode 100644
index 000000000..2f8b709eb
--- /dev/null
+++ b/runtime/contrib/README.md
@@ -0,0 +1,10 @@
+# nnfw contrib
+
+The `contrib` directory is basically a contribution channel where contributors can create a project
+and start the code development. The projects in the `contrib` directory may not be directly related
+to `nnfw` but should have its own purpose that could augment the nnfw project.
+
+If you are interested in proposing a new project, please create a pull request (PR) with a new
+project directory under `contrib` including the description of proposing project. The PR will be
+reviewed by reviewers in `nnfw`, and the acceptance of new project will be determined based on the
+PR reviews.
diff --git a/runtime/contrib/TFLiteSharp/README.md b/runtime/contrib/TFLiteSharp/README.md
new file mode 100644
index 000000000..8e43be618
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/README.md
@@ -0,0 +1,92 @@
+# C-Sharp TFLite API Directory structure
+```
+.
+├── packaging
+│   ├── TFLiteSharp.manifest
+│   └── TFLiteSharp.spec
+├── README.md
+├── TFLiteNative
+│   ├── CMakeLists.txt
+│   ├── include
+│   │   ├── tflite_log.h
+│   │   └── tflite_nativewrapper.h
+│   ├── src
+│   │   └── tflite_nativewrapper.cpp
+│   └── tflite-native.pc.in
+├── TFLiteSharp
+│   ├── TFLiteSharp
+│   │   ├── src
+│   │   │   └── Interpreter.cs
+│   │   └── TFLiteSharp.csproj
+│   └── TFLiteSharp.sln
+└── TFLiteSharpTest
+ ├── TFLiteSharpTest
+ │   ├── Program.cs
+ │   └── TFLiteSharpTest.csproj
+ └── TFLiteSharpTest.sln
+```
+
+# Build C-Sharp TFLite
+gbs should be used to build TFLiteSharp package. nnfw is also built by gbs. As in most cases when building nnfw we won't intend to build TFLiteSharp hence we have separated its build process, so in order to build TFLiteSharp below command is needed:
+```
+nnfw$ gbs build --packaging-dir=contrib/TFLiteSharp/packaging/ --spec=TFLiteSharp.spec -A armv7l
+```
+This will first build the TFLiteNative package containing native c++ bindings between c# api and tflite api
+and then it will build TFLiteSharp(c# api package).
+
+Please use gbs.conf file corresponding to tizen image version. In most cases gbs.conf file should be same as the one which is used to build nnfw.
+# C-Sharp TFLite API list
+
+## Interpreter Class
+
+### Constructor
+
+The `Interpreter.cs` class drives model inference with TensorFlow Lite.
+
+#### Initializing an `Interpreter` With a Model File
+
+The `Interpreter` can be initialized with a model file using the constructor:
+
+```c#
+public Interpreter(string modelFile);
+```
+
+Number of threads available to the interpereter can be set by using the following function:
+```c#
+public void SetNumThreads(int num_threads)
+```
+
+### Running a model
+
+If a model takes only one input and returns only one output, the following will trigger an inference run:
+
+```c#
+interpreter.Run(input, output);
+```
+
+For models with multiple inputs, or multiple outputs, use:
+
+```c#
+interpreter.RunForMultipleInputsOutputs(inputs, map_of_indices_to_outputs);
+```
+
+The C# api also provides functions for getting the model's input and output indices given the name of tensors as input:
+
+```c#
+public int GetInputIndex(String tensorName)
+public int GetOutputIndex(String tensorName)
+```
+
+Developer can also enable or disable the use of NN API based on hardware capabilites:
+```c#
+public void SetUseNNAPI(boolean useNNAPI)
+```
+
+### Releasing Resources After Use
+
+An `Interpreter` owns resources. To avoid memory leak, the resources must be
+released after use by:
+
+```c#
+interpreter.Dispose();
+```
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/CMakeLists.txt b/runtime/contrib/TFLiteSharp/TFLiteNative/CMakeLists.txt
new file mode 100644
index 000000000..8b58aac9c
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/CMakeLists.txt
@@ -0,0 +1,67 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+SET(fw_name "tflite-native")
+
+PROJECT(${fw_name})
+SET(PREFIX ${CMAKE_INSTALL_PREFIX})
+SET(LIB ${LIB_PATH})
+SET(LIBDIR ${PREFIX}/${LIB_PATH})
+
+SET(INC_DIR include)
+INCLUDE_DIRECTORIES(${INC_DIR})
+
+INCLUDE(FindPkgConfig)
+
+SET(COMMON_DEPS "tensorflow-lite")
+SET(PC_DEPS "capi-base-common")
+
+IF (TIZEN)
+ MESSAGE("Building for Tizen")
+ SET(TIZEN_DEPS "dlog")
+ PKG_CHECK_MODULES(${fw_name} REQUIRED ${COMMON_DEPS} ${TIZEN_DEPS})
+ ADD_DEFINITIONS("-D__TIZEN__")
+ELSE ()
+ MESSAGE("Building for Linux")
+ PKG_CHECK_MODULES(${fw_name} REQUIRED ${COMMON_DEPS})
+ENDIF ()
+
+FOREACH(flag ${${fw_name}_CFLAGS})
+ SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}")
+ENDFOREACH(flag)
+
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXXFLAGS} -fPIC -Wall -Werror")
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS} -fPIC -Wall")
+SET(CMAKE_C_FLAGS_DEBUG "-O0 -g")
+
+ADD_DEFINITIONS("-DPREFIX=\"${CMAKE_INSTALL_PREFIX}\"")
+
+SET(CMAKE_EXE_LINKER_FLAGS "-Wl,--as-needed -Wl,--rpath=${LIBDIR}")
+
+aux_source_directory(src SOURCES)
+ADD_LIBRARY(${fw_name} SHARED ${SOURCES})
+
+TARGET_LINK_LIBRARIES(${fw_name} ${${fw_name}_LDFLAGS})
+
+SET_TARGET_PROPERTIES(${fw_name}
+ PROPERTIES
+ VERSION ${FULLVER}
+ SOVERSION ${MAJORVER}
+ CLEAN_DIRECT_OUTPUT 1
+)
+
+INSTALL(TARGETS ${fw_name} DESTINATION ${LIB})
+INSTALL(
+ DIRECTORY ${INC_DIR}/ DESTINATION include/
+ FILES_MATCHING
+ PATTERN "${INC_DIR}/*.h"
+ )
+
+SET(PC_NAME ${fw_name})
+SET(PC_REQUIRED ${pc_dependents})
+SET(PC_LDFLAGS -l${fw_name})
+
+CONFIGURE_FILE(
+ ${fw_name}.pc.in
+ ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}.pc
+ @ONLY
+)
+INSTALL(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${fw_name}.pc DESTINATION ${LIB}/pkgconfig)
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
new file mode 100644
index 000000000..405ca9879
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_log.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TFLITE_LOG_H_
+#define _TFLITE_LOG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /*__cplusplus*/
+
+#define ERROR 1
+#define WARNING 2
+#define INFO 3
+#define DEBUG 4
+
+#ifdef __TIZEN__
+#include <dlog/dlog.h>
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif // LOG_TAG
+#define LOG_TAG "TFLITE_NATIVE"
+
+#define TFLITE_NATIVE_LOG(log_level, format, args...) \
+ do \
+ { \
+ switch (log_level) \
+ { \
+ case ERROR: \
+ LOGE(format, ##args); \
+ case WARNING: \
+ LOGE(format, ##args); \
+ default: \
+ LOGI(format, ##args); \
+ } \
+ } while (0)
+#else // __TIZEN__
+#define LEVEL_TO_STR(level) \
+ (((level) == ERROR) \
+ ? "ERROR" \
+ : ((level) == WARNING) \
+ ? "WARNING" \
+ : ((level) == INFO) ? "INFO" : ((level) == DEBUG) ? "DEBUG" : "DEFAULT")
+#define TFLITE_NATIVE_LOG(log_level, format, args...) \
+ do \
+ { \
+ printf("%s: %s: ", LEVEL_TO_STR(log_level), __FILE__); \
+ printf(format, ##args); \
+ printf("\n"); \
+ } while (0)
+#endif // __TIZEN__
+
+#ifdef __cplusplus
+}
+#endif /*__cplusplus*/
+
+#endif /*_TFLITE_LOG_H*/
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
new file mode 100644
index 000000000..af1947ff0
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/include/tflite_nativewrapper.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TFLITE_NATIVEWRAPPER_H_
+#define _TFLITE_NATIVEWRAPPER_H_
+
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/string_util.h"
+#include "tensorflow/lite/tools/mutable_op_resolver.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /*__cplusplus*/
+
+typedef enum {
+ /** 32-bit signed integer. */
+ INT32 = 1,
+
+ /** 32-bit single precision floating point. */
+ FLOAT32 = 2,
+
+ /** 8-bit unsigned integer. */
+ UINT8 = 3,
+
+ /** 64-bit signed integer. */
+ INT64 = 4
+} TFLiteNativeType;
+
+void tflite_interpreter_setNumThreads(long *interpreterHandle, int numThreads);
+
+long long tflite_flatbuffermodel_BuildFromFile(char *modelPath);
+
+long long tflite_builder_interpreterBuilder(long *modelHandle);
+
+void *tflite_interpreter_run(long *interpreterHandle, void *values, int inputLength, int dataType);
+
+#ifdef __cplusplus
+}
+#endif /*__cplusplus*/
+
+#endif /*_TFLITE_NATIVEWRAPPER_H_*/
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp b/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp
new file mode 100644
index 000000000..0304720f7
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/src/tflite_nativewrapper.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include "tflite_nativewrapper.h"
+#include "tflite_log.h"
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+int getNumBytes(TFLiteNativeType dataType)
+{
+ switch (dataType)
+ {
+ case INT32:
+ return 4;
+ case FLOAT32:
+ return 4;
+ case UINT8:
+ return 1;
+ case INT64:
+ return 8;
+ default:
+ return 1;
+ }
+}
+
+/// <summary>
+/// Set the number of threads available to the interpreter.
+/// </summary>
+/// <param name="interpreterHandle">Handle of the interpreter instance.</param>
+/// <param name="numThreads">Number of threads.</param>
+void tflite_interpreter_setNumThreads(long *interpreterHandle, int numThreads)
+{
+ assert(interpreterHandle != nullptr);
+ tflite::Interpreter *interpreter = reinterpret_cast<tflite::Interpreter *>(*interpreterHandle);
+
+ interpreter->SetNumThreads(numThreads);
+
+ TFLITE_NATIVE_LOG(DEBUG, "Number of threads: %d", numThreads);
+ return;
+}
+
+/// <summary>
+/// Creates a Flat Buffer Model from the given .tflite model.
+/// </summary>
+/// <param name="modelPath">Path of the model.</param>
+long long tflite_flatbuffermodel_BuildFromFile(char *modelPath)
+{
+ if (modelPath == nullptr)
+ {
+ TFLITE_NATIVE_LOG(ERROR, "Invalid parameter");
+ return 0;
+ }
+ TFLITE_NATIVE_LOG(ERROR, "Model Path: %s", modelPath);
+
+ if (access(modelPath, F_OK) == -1)
+ {
+ TFLITE_NATIVE_LOG(ERROR, "Failed to access model [%s]", strerror(errno));
+ return 0;
+ }
+
+ auto model = tflite::FlatBufferModel::BuildFromFile(modelPath);
+
+ TFLITE_NATIVE_LOG(DEBUG, "Successfully loaded model");
+ return reinterpret_cast<long>(model.release());
+}
+
+/// <summary>
+/// Creates an interpreter instance taking the flatbuffer model as input.
+/// </summary>
+/// <param name="modelHandle">Address of the flatbuffer model.</param>
+long long tflite_builder_interpreterBuilder(long *modelHandle)
+{
+ assert(modelHandle != nullptr);
+ tflite::FlatBufferModel *model = reinterpret_cast<tflite::FlatBufferModel *>(*modelHandle);
+
+ tflite::ops::builtin::BuiltinOpResolver resolver;
+ std::unique_ptr<tflite::Interpreter> interpreter;
+
+ TfLiteStatus status = tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+
+ if (status != kTfLiteOk)
+ {
+ TFLITE_NATIVE_LOG(DEBUG, "Cannot create interpreter");
+ return 0;
+ }
+ TFLITE_NATIVE_LOG(DEBUG, "CheckPoint interpreter");
+ return reinterpret_cast<long>(interpreter.release());
+}
+
+/// <summary>
+/// Runs the inference given the inputs.
+/// </summary>
+/// <param name="interpreterHandle">Address of the interpreter instance.</param>
+/// <param name="values">Input values for the model.</param>
+/// <param name="inpLength">Length of the input.</param>
+/// <param name="dataType">Data type key of the input.</param>
+void *tflite_interpreter_run(long *interpreterHandle, void *values, int inputLength, int dataType)
+{
+ assert(interpreterHandle != nullptr);
+ tflite::Interpreter *interpreter = reinterpret_cast<tflite::Interpreter *>(*interpreterHandle);
+
+ int inputTensorIndex = interpreter->inputs()[0];
+
+ // TODO:: input tensor size will be passed as a parameter. It is hardcoded for now.
+ interpreter->ResizeInputTensor(inputTensorIndex, {1, 224, 224, 3});
+
+ if (interpreter->AllocateTensors() != kTfLiteOk)
+ {
+ TFLITE_NATIVE_LOG(ERROR, "Failed to allocate tensors!");
+ return nullptr;
+ }
+
+ float *inputTensorPointer = interpreter->typed_tensor<float>(inputTensorIndex);
+
+ int numBytes = getNumBytes((TFLiteNativeType)dataType);
+
+ memcpy(inputTensorPointer, values, inputLength * numBytes);
+
+ if (interpreter->Invoke() != kTfLiteOk)
+ {
+ TFLITE_NATIVE_LOG(ERROR, "Failed to invoke");
+ }
+
+ float *output = interpreter->typed_output_tensor<float>(0);
+ return output;
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteNative/tflite-native.pc.in b/runtime/contrib/TFLiteSharp/TFLiteNative/tflite-native.pc.in
new file mode 100644
index 000000000..eec103acc
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteNative/tflite-native.pc.in
@@ -0,0 +1,13 @@
+# Package Information for pkg-config
+
+prefix=@PREFIX@
+exec_prefix=/usr
+libdir=@LIB_INSTALL_DIR@
+includedir=@INCLUDE_INSTALL_DIR@/
+
+Name: @PC_NAME@
+Description: @PACKAGE_DESCRIPTION@
+Version: @VERSION@
+Requires: @PC_REQUIRED@ tensorflow-lite
+Libs: -L${libdir} @PC_LDFLAGS@
+Cflags: -I${includedir}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp.sln b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp.sln
new file mode 100644
index 000000000..985466cef
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp.sln
@@ -0,0 +1,25 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.26730.16
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TFLiteSharp", "TFLiteSharp\TFLiteSharp.csproj", "{22D47176-D5AD-4AD4-8867-8788139DF71C}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {22D47176-D5AD-4AD4-8867-8788139DF71C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {22D47176-D5AD-4AD4-8867-8788139DF71C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {22D47176-D5AD-4AD4-8867-8788139DF71C}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {22D47176-D5AD-4AD4-8867-8788139DF71C}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {1B276F69-8E79-4501-AF04-6D340690762B}
+ EndGlobalSection
+EndGlobal
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.Libraries.cs b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.Libraries.cs
new file mode 100644
index 000000000..db8d9f612
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.Libraries.cs
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+internal static partial class Interop
+{
+ internal static partial class Libraries
+ {
+ public const string TFLite = "libtflite-native.so";
+ }
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.TFLite.cs b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.TFLite.cs
new file mode 100644
index 000000000..c7c7b24aa
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/Interop/Interop.TFLite.cs
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Runtime.InteropServices;
+
+internal static partial class Interop
+{
+ internal static partial class TFLite
+ {
+ [DllImport(Libraries.TFLite, EntryPoint = "tflite_flatbuffermodel_BuildFromFile")]
+ internal static extern IntPtr TFLiteFlatBufferModelBuildFromFile(string path);
+
+ [DllImport(Libraries.TFLite, EntryPoint = "tflite_builder_interpreterBuilder")]
+ internal static extern IntPtr TFLiteBuilderInterpreterBuilder(ref IntPtr modelHandle);
+
+ [DllImport(Libraries.TFLite, EntryPoint = "tflite_interpreter_setNumThreads")]
+ internal static extern void TFLiteInterpreterSetNumThreads(int numThreads);
+
+ [DllImport(Libraries.TFLite, EntryPoint = "tflite_interpreter_run")]
+ internal static extern IntPtr TFLiteInterpreterRun(ref IntPtr interpreterHandle, IntPtr values, int inpLen, int dataType);
+
+ }
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/TFLiteSharp.csproj b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/TFLiteSharp.csproj
new file mode 100644
index 000000000..e0490bfb8
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/TFLiteSharp.csproj
@@ -0,0 +1,52 @@
+<Project>
+ <Import Project="Sdk.props" Sdk="Microsoft.NET.Sdk" />
+
+ <PropertyGroup Label="Globals">
+ <TizenProjectExtensionsPath>$(MSBuildExtensionsPath)\Tizen\VisualStudio\</TizenProjectExtensionsPath>
+ </PropertyGroup>
+
+ <Import Project="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props" Condition="Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props')" />
+
+ <PropertyGroup>
+ <OutputType>Library</OutputType>
+ <TargetFramework>netstandard2.0</TargetFramework>
+ </PropertyGroup>
+
+ <!--
+ This Property Group for msbuild command line.
+ If project build on Visual Studio, it would be set automatically through the certificate manager.
+ <PropertyGroup>
+ <AuthorPath>author_test.p12</AuthorPath>
+ <AuthorPass>author_test</AuthorPass>
+ <DistributorPath>tizen-distributor-signer.p12</DistributorPath>
+ <DistributorPass>tizenpkcs12passfordsigner</DistributorPass>
+ </PropertyGroup>
+ -->
+
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugType>portable</DebugType>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>None</DebugType>
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Remove="res\**" />
+ <EmbeddedResource Remove="res\**" />
+ <None Remove="res\**" />
+ </ItemGroup>
+
+ <ItemGroup>
+ <Folder Include="Interop\" />
+ <Folder Include="lib\" />
+ </ItemGroup>
+
+ <Import Project="Sdk.targets" Sdk="Microsoft.NET.Sdk" />
+ <Import Project="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets" Condition="Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets')" />
+
+ <!-- Install Check 'Visual Studio for Tizen' for developing on Visual Studio -->
+ <Target Name="TizenVsixInstallCheck" BeforeTargets="CompileDesignTime">
+ <Warning Condition="!Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props')" Text="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props is not exist.&#xA; you need to check if 'Visual Studio for Tizen' is installed" />
+ <Warning Condition="!Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets')" Text="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets is not exist.\&#xA; you need to check if 'Visual Studio for Tizen' is installed" />
+ </Target>
+</Project>
+
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Datatype.cs b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Datatype.cs
new file mode 100644
index 000000000..404d1663e
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Datatype.cs
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Type of elements in a {@link TfLiteTensor}. */
+enum DataType
+{
+ /** 32-bit signed integer. */
+ INT32 = 1,
+
+ /** 32-bit single precision floating point. */
+ FLOAT32 = 2,
+
+ /** 8-bit unsigned integer. */
+ UINT8 = 3,
+
+ /** 64-bit signed integer. */
+ INT64 = 4
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Interpreter.cs b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Interpreter.cs
new file mode 100644
index 000000000..f1b4a8e07
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharp/TFLiteSharp/src/Interpreter.cs
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace TFLite
+{
+
+ /// <summary>
+ /// Driver class to drive model inference with TensorFlow Lite. Interpreter
+ /// encapsulates a pre-trained model file in whihc the operations are performed
+ /// @class Interpreter
+ /// </summary>
+ public class Interpreter : IDisposable
+ {
+ // Handle to hold the model instance
+ private IntPtr m_modelHandle;
+ // Handle to hold the interpreter instance
+ private IntPtr m_interpreterHandle;
+
+ /// <summary>
+ /// Interpreter Constructor. Inititalizes an interpreter.
+ /// </summary>
+ ///<param name="modelPath">a File of a pre-trained TF Lite model. </param>
+ public Interpreter(string modelPath)
+ {
+ //Constructor to initialize the interpreter with a model file
+ m_modelHandle = Interop.TFLite.TFLiteFlatBufferModelBuildFromFile(modelPath);
+ if(m_modelHandle == IntPtr.Zero)
+ {
+ //TODO: routine for handling null pointer.
+ }
+ m_interpreterHandle = Interop.TFLite.TFLiteBuilderInterpreterBuilder(ref m_modelHandle);
+ if (m_interpreterHandle == IntPtr.Zero)
+ {
+ //TODO: routine for handling null pointer.
+ }
+ }
+
+ /// <summary>
+ /// Set the number of threads available to the interpreter.
+ /// </summary>
+ /// <param name="numThreads">Number of threads.</param>
+ public void SetNumThreads(int numThreads)
+ {
+ Interop.TFLite.TFLiteInterpreterSetNumThreads(numThreads);
+ return;
+ }
+
+ /// <summary>
+ /// Runs model inference if the model takes only one input, and provides only
+ /// one output.
+ /// </summary>
+ /// <param name="input">input an array or multidimensional array.</param>
+ /// <param name="output">outputs a multidimensional array of output data.</param>
+ public void Run(Array input, ref Array output)
+ {
+ Array[] inputs = { input };
+ Dictionary<int, Array> outputs = new Dictionary<int, Array>();
+
+ RunForMultipleInputsOutputs(inputs, ref outputs);
+ output = outputs[0];
+
+ return;
+ }
+
+ /// <summary>
+ /// Runs model inference if the model takes multiple inputs, or returns multiple
+ /// outputs.
+ /// </summary>
+ /// <param name="inputs">input an array of input data.</param>
+ /// <param name="outputs">outputs a map mapping output indices to multidimensional
+ /// arrays of output data.</param>
+ public void RunForMultipleInputsOutputs(Array[] inputs, ref Dictionary<int, Array> outputs)
+ {
+ if(m_interpreterHandle == IntPtr.Zero)
+ {
+ //TODO:: exception handling
+ }
+
+ if (inputs == null || inputs.Length == 0)
+ {
+ //TODO::throw new IllegalArgumentException("Input error: Inputs should not be null or empty.");
+ }
+
+ DataType[] dataTypes = new DataType[inputs.Length];//To be used in multi-dimensional case
+
+ for (int i = 0; i < inputs.Length; ++i)
+ {
+ dataTypes[i] = DataTypeOf(inputs[i]);
+ }
+
+ //TODO:: Support for multi dimesional array to be added.
+ IntPtr pnt = Marshal.AllocHGlobal(inputs[0].Length);
+
+ switch (dataTypes[0])
+ {
+ case DataType.INT32:
+ Marshal.Copy((int[])inputs[0], 0, pnt, inputs[0].Length);
+ break;
+ case DataType.FLOAT32:
+ Marshal.Copy((float[])inputs[0], 0, pnt, inputs[0].Length);
+ break;
+ case DataType.UINT8:
+ Marshal.Copy((byte[])inputs[0], 0, pnt, inputs[0].Length);
+ break;
+ case DataType.INT64:
+ Marshal.Copy((long[])inputs[0], 0, pnt, inputs[0].Length);
+ break;
+ default:
+ Marshal.Copy((byte[])inputs[0], 0, pnt, inputs[0].Length);
+ break;
+ }
+
+ //Currently this handles only single input with single dimension.
+ IntPtr outputsHandles = Interop.TFLite.TFLiteInterpreterRun(ref m_interpreterHandle, pnt, inputs[0].Length, (int)dataTypes[0]);
+
+ if (outputsHandles == null)
+ {
+ //throw new IllegalStateException("Internal error: Interpreter has no outputs.");
+ }
+
+ switch (dataTypes[0])
+ {
+ case DataType.INT32:
+ int[] managedArrayInt = new int[inputs[0].Length];
+ Marshal.Copy(outputsHandles, managedArrayInt, 0, inputs[0].Length);
+ outputs.Add(0, managedArrayInt);
+ break;
+ case DataType.FLOAT32:
+ float[] managedArrayFloat = new float[inputs[0].Length];
+ Marshal.Copy(outputsHandles, managedArrayFloat, 0, inputs[0].Length);
+ outputs.Add(0, managedArrayFloat);
+ break;
+ case DataType.UINT8:
+ byte[] managedArrayByte = new byte[inputs[0].Length];
+ Marshal.Copy(outputsHandles, managedArrayByte, 0, inputs[0].Length);
+ outputs.Add(0, managedArrayByte);
+ break;
+ case DataType.INT64:
+ long[] managedArrayLong = new long[inputs[0].Length];
+ Marshal.Copy(outputsHandles, managedArrayLong, 0, inputs[0].Length);
+ outputs.Add(0, managedArrayLong);
+ break;
+ default:
+ byte[] managedArrayDefault = new byte[inputs[0].Length];
+ Marshal.Copy(outputsHandles, managedArrayDefault, 0, inputs[0].Length);
+ outputs.Add(0, managedArrayDefault);
+ break;
+ }
+ return;
+ }
+
+ static DataType DataTypeOf(Array a)
+ {
+ if (a.GetValue(0).GetType()==typeof(int))
+ {
+ return DataType.INT32;
+ }
+ else if (a.GetValue(0).GetType() == typeof(float))
+ {
+ return DataType.FLOAT32;
+ }
+ else if (a.GetValue(0).GetType() == typeof(byte))
+ {
+ return DataType.UINT8;
+ }
+ else if(a.GetValue(0).GetType() == typeof(long))
+ {
+ return DataType.INT64;
+ }
+ else
+ {
+ return DataType.UINT8;
+ //TODO: throw exception
+ }
+
+ }
+
+ /// <summary>
+ /// Resizes idx-th input of the native model to the given dims.
+ /// </summary>
+ /// <param name="idx">index of the input.</param>
+ /// <param name="dims">Dimensions to which input needs to be resized.</param>
+ public void ResizeInput(int idx, int[] dims)
+ {
+ return;
+ }
+
+ /// <summary>
+ /// Gets index of an input given the tensor name of the input.
+ /// </summary>
+ /// <param name="tensorName">Name of the tensor.</param>
+ public int GetInputIndex(string tensorName)
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Gets index of output given the tensor name of the input.
+ /// </summary>
+ /// <param name="tensorName">Name of the tensor.</param>
+ public int GetOutputIndex(string tensorName)
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Turns on/off Android NNAPI for hardware acceleration when it is available.
+ /// </summary>
+ /// <param name="useNNAPI">set the boolean value to turn on/off nnapi.</param>
+ public void SetUseNNAPI(bool useNNAPI)
+ {
+ return;
+ }
+
+ /// <summary>
+ /// Release resources associated with the Interpreter.
+ /// </summary>
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool bDisposing)
+ {
+ if (m_interpreterHandle != IntPtr.Zero)
+ {
+ // Call the function to dispose this class
+ m_interpreterHandle = IntPtr.Zero;
+ }
+
+ if (bDisposing)
+ {
+ // No need to call the finalizer since we've now cleaned
+ // up the unmanaged memory
+ GC.SuppressFinalize(this);
+ }
+ }
+
+ // This finalizer is called when Garbage collection occurs, but only if
+ // the IDisposable.Dispose method wasn't already called.
+ ~Interpreter()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest.sln b/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest.sln
new file mode 100644
index 000000000..e260a72c7
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest.sln
@@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 15
+VisualStudioVersion = 15.0.26730.16
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TFLiteSharpTest", "TFLiteSharpTest\TFLiteSharpTest.csproj", "{D35A178F-9EF3-4B07-9E53-A91AA7A030B3}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TFLiteSharp", "..\TFLiteSharp\TFLiteSharp\TFLiteSharp.csproj", "{C06BC425-9BC3-43C7-A9D3-E12849E0C129}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {D35A178F-9EF3-4B07-9E53-A91AA7A030B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D35A178F-9EF3-4B07-9E53-A91AA7A030B3}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D35A178F-9EF3-4B07-9E53-A91AA7A030B3}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D35A178F-9EF3-4B07-9E53-A91AA7A030B3}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C06BC425-9BC3-43C7-A9D3-E12849E0C129}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C06BC425-9BC3-43C7-A9D3-E12849E0C129}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C06BC425-9BC3-43C7-A9D3-E12849E0C129}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C06BC425-9BC3-43C7-A9D3-E12849E0C129}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {8F946511-2BE4-40A5-A48C-A5684C62755D}
+ EndGlobalSection
+EndGlobal
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/Program.cs b/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/Program.cs
new file mode 100644
index 000000000..e559bec36
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/Program.cs
@@ -0,0 +1,38 @@
+using System;
+
+namespace TFLiteSharpTest
+{
+ class Program
+ {
+ static void Main(string[] args)
+ {
+ //Constructing a new interpreter instance from the modelfile
+ TFLite.Interpreter interpreter = new TFLite.Interpreter("modelpath/modelfile.tflite");
+ Console.WriteLine("Interpreter Built Successfully");
+
+ //Setting the number of threads of the interpreter
+ interpreter.SetNumThreads(1);
+
+ //Declaring input and output variables;
+ Array input = new int[5] { 1, 2, 3, 4, 5 };
+ Array output = new int[5];
+
+ //Call to invoke the interpreter and run the inference to populate output
+ interpreter.Run(input, out output);
+ Console.WriteLine("Output generated Successfully");
+
+ //get input, output indices
+ Console.WriteLine("Input index for tensorname: " + interpreter.GetInputIndex("tensorname"));
+ Console.WriteLine("Output index for tensorname: " + interpreter.GetOutputIndex("tensorname"));
+
+ //Resizing the dimensions
+ int[] dims = new int[3] { 1, 2, 3 };
+ interpreter.ResizeInput(1, dims);
+
+ //Disposing the interpreter to free resources at the end
+ interpreter.Dispose();
+
+ Console.WriteLine("Run Complete");
+ }
+ }
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/TFLiteSharpTest.csproj b/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/TFLiteSharpTest.csproj
new file mode 100644
index 000000000..b143ee598
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteSharpTest/TFLiteSharpTest/TFLiteSharpTest.csproj
@@ -0,0 +1,12 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ <TargetFramework>netcoreapp2.0</TargetFramework>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\..\TFLiteSharp\TFLiteSharp\TFLiteSharp.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp.csproj b/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp.csproj
new file mode 100644
index 000000000..1c9ed6037
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp.csproj
@@ -0,0 +1,54 @@
+<Project>
+ <Import Project="Sdk.props" Sdk="Microsoft.NET.Sdk" />
+
+ <!-- Setting Tizen Extension Path -->
+ <PropertyGroup Label="Globals">
+ <TizenProjectExtensionsPath>$(MSBuildExtensionsPath)\Tizen\VisualStudio\</TizenProjectExtensionsPath>
+ </PropertyGroup>
+
+ <!-- Import Tizen property in Tizen.NET SDK -->
+ <Import Project="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props" Condition="Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props')" />
+
+ <!-- Property Group for .NET Core Project -->
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ <TargetFramework>netstandard2.0</TargetFramework>
+ </PropertyGroup>
+
+ <!-- Property Group for Tizen Project -->
+ <PropertyGroup>
+ <TizenCreateTpkOnBuild>true</TizenCreateTpkOnBuild>
+ <PackageTargetFallback>$(PackageTargetFallback);portable-net45+wp80+win81+wpa81</PackageTargetFallback>
+ </PropertyGroup>
+
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugType>portable</DebugType>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>None</DebugType>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <Folder Include="lib\" />
+ <Folder Include="res\" />
+ </ItemGroup>
+
+ <!-- Include Nuget Package for Tizen Project building -->
+ <ItemGroup>
+ <PackageReference Include="Tizen.NET" Version="3.0.0" />
+ <PackageReference Include="Tizen.NET.Sdk" Version="1.0.1" />
+ <PackageReference Include="Xamarin.Forms.Platform.Tizen" Version="2.3.5-r256-001" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\TFLiteSharp\TFLiteSharp\TFLiteSharp.csproj" />
+ </ItemGroup>
+
+ <Import Project="Sdk.targets" Sdk="Microsoft.NET.Sdk" />
+ <Import Project="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets" Condition="Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets')" />
+
+ <!-- Install Check 'Visual Studio for Tizen' for developing on Visual Studio -->
+ <Target Name="TizenVsixInstallCheck" BeforeTargets="CompileDesignTime">
+ <Warning Condition="!Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props')" Text="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.props is not exist.&#xA; you need to check if 'Visual Studio for Tizen' is installed" />
+ <Warning Condition="!Exists('$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets')" Text="$(TizenProjectExtensionsPath)Tizen.NET.ProjectType.targets is not exist.\&#xA; you need to check if 'Visual Studio for Tizen' is installed" />
+ </Target>
+</Project>
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_App.cs b/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_App.cs
new file mode 100644
index 000000000..49a08604d
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_App.cs
@@ -0,0 +1,65 @@
+using System;
+using System.IO;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+using Xamarin.Forms;
+
+namespace TFLiteTestApp
+{
+ public class App : Application
+ {
+ public App()
+ {
+ TFLite.Interpreter interpreter = null;
+ try
+ {
+ interpreter = new TFLite.Interpreter(Tizen.Applications.Application.Current.DirectoryInfo.Resource + "mobilenet_v1_1.0_224.tflite");
+ }
+ catch(Exception e)
+ {
+ Tizen.Log.Debug("tflite", "Error: " + e);
+ }
+
+ Tizen.Log.Debug("tflite", "Interpreter Initialised");
+ Array Output = new byte[1000];
+
+ Array input = new byte[150582];
+ input = File.ReadAllBytes(Tizen.Applications.Application.Current.DirectoryInfo.Resource + "mouse_224.bmp");
+
+ interpreter.Run(input, ref Output);
+ //val variable to check if the Output array is being populated or not.
+ byte val = ((byte[])Output)[0];
+ // The root page of your application
+ MainPage = new ContentPage
+ {
+ Content = new StackLayout
+ {
+ VerticalOptions = LayoutOptions.Center,
+ Children = {
+ new Label {
+ HorizontalTextAlignment = TextAlignment.Center,
+ Text = "Welcome to Xamarin Forms!"
+ }
+ }
+ }
+ };
+ }
+
+ protected override void OnStart()
+ {
+ // Handle when your app starts
+ }
+
+ protected override void OnSleep()
+ {
+ // Handle when your app sleeps
+ }
+
+ protected override void OnResume()
+ {
+ // Handle when your app resumes
+ }
+ }
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_Main.cs b/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_Main.cs
new file mode 100644
index 000000000..2a8f747a4
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/TFLiteTestApp_Main.cs
@@ -0,0 +1,20 @@
+using System;
+
+namespace TFLiteTestApp
+{
+ class Program : global::Xamarin.Forms.Platform.Tizen.FormsApplication
+ {
+ protected override void OnCreate()
+ {
+ base.OnCreate();
+ LoadApplication(new App());
+ }
+
+ static void Main(string[] args)
+ {
+ var app = new Program();
+ global::Xamarin.Forms.Platform.Tizen.Forms.Init(app);
+ app.Run(args);
+ }
+ }
+}
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mobilenet_v1_1.0_224.tflite b/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mobilenet_v1_1.0_224.tflite
new file mode 100644
index 000000000..d34691eb6
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mobilenet_v1_1.0_224.tflite
Binary files differ
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse1.bmp b/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse1.bmp
new file mode 100644
index 000000000..1734ca318
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse1.bmp
Binary files differ
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse_224.bmp b/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse_224.bmp
new file mode 100644
index 000000000..ccfed6ad3
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/res/mouse_224.bmp
Binary files differ
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/shared/res/TFLiteTestApp.png b/runtime/contrib/TFLiteSharp/TFLiteTestApp/shared/res/TFLiteTestApp.png
new file mode 100644
index 000000000..9f3cb9860
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/shared/res/TFLiteTestApp.png
Binary files differ
diff --git a/runtime/contrib/TFLiteSharp/TFLiteTestApp/tizen-manifest.xml b/runtime/contrib/TFLiteSharp/TFLiteTestApp/tizen-manifest.xml
new file mode 100644
index 000000000..62a8d4c7c
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/TFLiteTestApp/tizen-manifest.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns="http://tizen.org/ns/packages" api-version="4" package="org.tizen.example.TFLiteTestApp" version="1.0.0">
+ <profile name="common" />
+ <ui-application appid="org.tizen.example.TFLiteTestApp"
+ exec="TFLiteTestApp.dll"
+ type="dotnet"
+ multiple="false"
+ taskmanage="true"
+ nodisplay="false"
+ launch_mode="single">
+ <label>TFLiteTestApp</label>
+ <icon>TFLiteTestApp.png</icon>
+ </ui-application>
+</manifest>
diff --git a/runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.manifest b/runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.manifest
new file mode 100644
index 000000000..75b0fa5e3
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.manifest
@@ -0,0 +1,5 @@
+<manifest>
+ <request>
+ <domain name="_"/>
+ </request>
+</manifest>
diff --git a/runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.spec b/runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.spec
new file mode 100644
index 000000000..dcb65a864
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/packaging/TFLiteSharp.spec
@@ -0,0 +1,103 @@
+Name: TFLiteSharp
+Summary: Tensorflow lite native cpp wrapper and C# API
+Version: 1.0.0
+Release: 1
+Group: Development/Libraries
+License: Apache-2.0
+Source0: %{name}-%{version}.tar.gz
+Source1: %{name}.manifest
+Source2: tflite-native.manifest
+
+%description
+%{summary}
+
+%package TFLiteNative
+Summary: Tensorflow lite native cpp wrapper
+Group: Development/Libraries
+BuildRequires: cmake
+BuildRequires: pkgconfig(dlog)
+BuildRequires: pkgconfig(tensorflow-lite)
+Requires(post): /sbin/ldconfig
+Requires(postun): /sbin/ldconfig
+
+%description TFLiteNative
+Native CPP Wrapper for Tensorflow lite
+
+%package TFLiteNative-devel
+Summary: Tensorflow lite native cpp wrapper (Development)
+Requires: %{name} = %{version}-%{release}
+
+%description TFLiteNative-devel
+Tensorflow lite native cpp wrapper (Development)
+
+%package TFLiteSharp
+Summary: Tensorflow lite API for C#
+Group: Development/Libraries
+AutoReqProv: no
+ExcludeArch: aarch64
+
+BuildRequires: dotnet-build-tools
+
+%define Assemblies TFLiteSharp
+
+%description TFLiteSharp
+Tensorflow lite API for C#
+
+%dotnet_import_sub_packages
+
+%prep
+%setup -q
+cp %{SOURCE1} .
+cp %{SOURCE2} .
+%if 0%{?tizen:1}
+%define TARGET_OS tizen
+%else
+%define TARGET_OS linux
+%endif
+
+%build
+MAJORVER=`echo %{version} | awk 'BEGIN {FS="."}{print $1}'`
+%if "%{TARGET_OS}" == "tizen"
+cmake VERBOSE=1 -DCMAKE_INSTALL_PREFIX=/usr -DFULLVER=%{version} -DMAJORVER=${MAJORVER} \
+ -DLIB_INSTALL_DIR=%{_libdir} -DINCLUDE_INSTALL_DIR=%{_includedir} \
+ -DLIB_PATH=%{_lib} -DTIZEN=1 contrib/TFLiteSharp/TFLiteNative
+%else
+cmake VERBOSE=1 -DCMAKE_INSTALL_PREFIX=/usr -DFULLVER=%{version} -DMAJORVER=${MAJORVER} \
+ -DLIB_INSTALL_DIR=%{_libdir} -DINCLUDE_INSTALL_DIR=%{_includedir} \
+ -DLIB_PATH=%{_lib} contrib/TFLiteSharp/TFLiteNative
+%endif
+
+make %{?_smp_mflags}
+
+cd contrib/TFLiteSharp/
+for ASM in %{Assemblies}; do
+%dotnet_build $ASM
+%dotnet_pack $ASM
+done
+
+%install
+%make_install
+cd contrib/TFLiteSharp/TFLiteSharp
+for ASM in %{Assemblies}; do
+%dotnet_install $ASM
+done
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%files
+%manifest %{name}.manifest
+%license LICENSE
+
+%files TFLiteNative
+%manifest tflite-native.manifest
+%{_libdir}/libtflite-native.so*
+
+%files TFLiteNative-devel
+%{_includedir}/*
+%{_libdir}/pkgconfig/tflite-native.pc
+%{_libdir}/libtflite-native.so*
+
+%files TFLiteSharp
+%attr(644,root,root) %{dotnet_assembly_files}
diff --git a/runtime/contrib/TFLiteSharp/packaging/tflite-native.manifest b/runtime/contrib/TFLiteSharp/packaging/tflite-native.manifest
new file mode 100644
index 000000000..75b0fa5e3
--- /dev/null
+++ b/runtime/contrib/TFLiteSharp/packaging/tflite-native.manifest
@@ -0,0 +1,5 @@
+<manifest>
+ <request>
+ <domain name="_"/>
+ </request>
+</manifest>
diff --git a/runtime/contrib/android_benchmark_app/AndroidManifest.xml b/runtime/contrib/android_benchmark_app/AndroidManifest.xml
new file mode 100644
index 000000000..f0b967cfa
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/AndroidManifest.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+ package="com.ndk.tflbench" >
+
+ <application
+ android:allowBackup="true"
+ android:icon="@drawable/ic_launcher"
+ android:label="@string/app_name"
+ android:theme="@style/AppTheme" >
+ <activity
+ android:name="com.ndk.tflbench.MainActivity"
+ android:label="@string/app_name" >
+ <intent-filter>
+ <action android:name="android.intent.action.MAIN" />
+
+ <category android:name="android.intent.category.LAUNCHER" />
+ </intent-filter>
+ </activity>
+ </application>
+
+</manifest>
diff --git a/runtime/contrib/android_benchmark_app/CMakeLists.txt b/runtime/contrib/android_benchmark_app/CMakeLists.txt
new file mode 100644
index 000000000..8e9d3c7a1
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/CMakeLists.txt
@@ -0,0 +1,97 @@
+if(NOT BUILD_ANDROID_BENCHMARK_APP)
+ return()
+endif(NOT BUILD_ANDROID_BENCHMARK_APP)
+
+if(NOT ANDROID)
+ message(STATUS "Sample app is disabled as non-Android build")
+ return()
+endif()
+
+if(NOT DEFINED ANDROID_BUILD_TOOLS_DIR)
+ message(STATUS "Sample app is disabled as ANDROID_BUILD_TOOLS_DIR is not defined")
+ return()
+endif()
+
+if(NOT DEFINED ANDROID_SDK_DIR)
+ message(STATUS "Sample app is disabled as ANDROID_SDK_DIR is not defined")
+ return()
+endif()
+
+if(NOT DEFINED TFLITE_MODEL_PATH)
+ message(STATUS "Sample app is disabled as TFLITE_MODEL_PATH is not defined")
+ return()
+endif()
+
+nnas_find_package(ARMCompute REQUIRED)
+
+if(NOT CORE_LIBRARY OR NOT RUNTIME_LIBRARY)
+ message(STATUS "Sample app is disabled as ARM Compute Library is missing")
+ return()
+endif()
+
+get_filename_component(TFLITE_MODEL_FILENAME ${TFLITE_MODEL_PATH} NAME)
+
+set(ANDROID_API_VERSION 27)
+set(ANDROID_PLATFORM_DIR ${ANDROID_SDK_DIR}/platforms/android-${ANDROID_API_VERSION})
+
+set(GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/gen)
+set(OBJ_DIR ${CMAKE_CURRENT_BINARY_DIR}/obj)
+set(PKG_DIR ${CMAKE_CURRENT_BINARY_DIR}/pkg)
+set(LIB_DIR ${PKG_DIR}/lib/arm64-v8a)
+set(ASSETS_DIR ${PKG_DIR}/assets)
+set(SRC_LIST ${CMAKE_CURRENT_BINARY_DIR}/src.list)
+
+if (ANDROID_BOOST_ROOT)
+ set(BOOST_ROOT ${ANDROID_BOOST_ROOT})
+endif (ANDROID_BOOST_ROOT)
+
+nnas_find_package(Boost REQUIRED)
+
+add_library(android_benchmark_native SHARED cpp/ndk_main.cpp)
+target_compile_definitions(android_benchmark_native PRIVATE MODEL_NAME="${TFLITE_MODEL_FILENAME}")
+target_include_directories(android_benchmark_native PRIVATE ${NNAS_EXTERNALS_DIR}/tensorflow)
+target_include_directories(android_benchmark_native PRIVATE ${Boost_INCLUDE_DIRS})
+target_link_libraries(android_benchmark_native nnfw_lib_tflite)
+target_link_libraries(android_benchmark_native nnfw_lib_misc)
+target_link_libraries(android_benchmark_native log)
+
+nnfw_find_package(FlatBuffersSource REQUIRED)
+target_include_directories(android_benchmark_native PUBLIC ${FlatBuffersSource_DIR}/include .)
+
+add_custom_target(android-benchmark-apk ALL
+ COMMAND ${CMAKE_COMMAND} -E remove_directory ${GEN_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${GEN_DIR}
+ COMMAND ${ANDROID_BUILD_TOOLS_DIR}/aapt package -m -J ${GEN_DIR}
+ -M ${CMAKE_CURRENT_SOURCE_DIR}/AndroidManifest.xml
+ -S ${CMAKE_CURRENT_SOURCE_DIR}/res
+ -I ${ANDROID_PLATFORM_DIR}/android.jar
+ COMMAND ${CMAKE_COMMAND} -E remove_directory ${OBJ_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${OBJ_DIR}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${SRC_LIST}
+ COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/java -name '*.java' >> ${SRC_LIST}
+ COMMAND find ${GEN_DIR} -name '*.java' >> ${SRC_LIST}
+ COMMAND javac -d ${OBJ_DIR} -source 1.7 -target 1.7 -bootclasspath "${JAVA_HOME}/jre/lib/rt.jar"
+ -classpath ${ANDROID_PLATFORM_DIR}/android.jar @${SRC_LIST}
+ COMMAND ${CMAKE_COMMAND} -E remove_directory ${PKG_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${PKG_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${ASSETS_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy ${TFLITE_MODEL_PATH} ${ASSETS_DIR}/model.tflite
+ COMMAND ${ANDROID_BUILD_TOOLS_DIR}/dx --dex --output=${PKG_DIR}/classes.dex ${OBJ_DIR}
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy ${CORE_LIBRARY} ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy ${RUNTIME_LIBRARY} ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:android_benchmark_native> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun_backend_acl_cl> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:arm_compute_ex> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun_backend_acl_neon> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:neurun_backend_cpu> ${LIB_DIR}
+ COMMAND ${CMAKE_COMMAND} -E echo ${ANDROID_BUILD_TOOLS_DIR}/aapt package -f -0 tflite -M ${CMAKE_CURRENT_SOURCE_DIR}/AndroidManifest.xml -S ${CMAKE_CURRENT_SOURCE_DIR}/res/ -I ${ANDROID_PLATFORM_DIR}/android.jar -F ${CMAKE_CURRENT_BINARY_DIR}/android-benchmark.unsigned.pkg ${PKG_DIR}
+ COMMAND ${ANDROID_BUILD_TOOLS_DIR}/aapt package -f
+ -0 tflite
+ -M ${CMAKE_CURRENT_SOURCE_DIR}/AndroidManifest.xml
+ -S ${CMAKE_CURRENT_SOURCE_DIR}/res/
+ -I ${ANDROID_PLATFORM_DIR}/android.jar
+ -F ${CMAKE_CURRENT_BINARY_DIR}/android-benchmark.unsigned.pkg
+ ${PKG_DIR}
+ DEPENDS android_benchmark_native neurun neurun_backend_acl_cl neurun_backend_acl_neon neurun_backend_cpu)
diff --git a/runtime/contrib/android_benchmark_app/README.md b/runtime/contrib/android_benchmark_app/README.md
new file mode 100644
index 000000000..2868e0ada
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/README.md
@@ -0,0 +1,58 @@
+# Android Benchmark App
+
+An Android sample app that run `.tflite` and measure performance.
+
+You can run with two engines.
+
+- Tensorflow Lite Interpreter
+- NN API Delegate (neurun)
+
+## Build
+
+In addition to aarch64-Android build, you need to specify more parameters.
+
+- `ANDROID_BUILD_TOOLS_DIR` : Android `build-tools` directory (You may find it in Android SDK directory)
+- `ANDROID_SDK_DIR` : Android SDK directory
+- `TFLITE_MODEL_PATH` : A model to run (Only one model can be packed)
+- `ANDROID_BOOST_ROOT` : Boost library root path
+ - This repo should contain `lib` and `include` directory
+ - How to build Boost for Android - Build with [this repo](https://github.com/moritz-wundke/Boost-for-Android)
+
+Example:
+
+```bash
+make TARGET_OS=android \
+ CROSS_BUILD=1 \
+ BUILD_TYPE=RELEASE \
+ NDK_DIR=/home/hanjoung/ws/android-tools/r20/ndk \
+ EXT_ACL_FOLDER=/home/hanjoung/ws/temp/arm_compute-v19.05-bin-android/lib/android-arm64-v8a-neon-cl \
+ ANDROID_BUILD_TOOLS_DIR=/home/hanjoung/ws/android-tools/sdk/build-tools/27.0.3/ \
+ ANDROID_SDK_DIR=/home/hanjoung/ws/android-tools/sdk \
+ TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/framework/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
+ ANDROID_BOOST_ROOT=/home/hanjoung/ws/gh/moritz-wundke/Boost-for-Android/build/out/arm64-v8a
+```
+
+And you will get `obj/contrib/android_benchmark_app/android-benchmark.unsigned.pkg`. This is an unsigned Android app package.
+
+## Sign APK
+
+Before installing the package you probably need to sign the package.
+
+- `apksigner` : This is in `build-tools` directory
+- Your keystore : How-to is TBD
+
+```bash
+apksigner sign \
+ --ks ~/.android/debug.keystore \
+ --in Product/aarch64-android.release/obj/contrib/android_benchmark_app/android-benchmark.unsigned.pkg \
+ --out tflbench.apk
+```
+
+You should enter the keystore password. Then you will get `tflbench.apk`.
+
+## Install APK
+
+```bash
+adb install tflbench.apk
+adb uninstall com.ndk.tflbench # To uninstall
+```
diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
new file mode 100644
index 000000000..f2ca1312c
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.cpp
@@ -0,0 +1,228 @@
+#include "ndk_main.h"
+
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+
+#include "tflite/Assert.h"
+#include "tflite/Session.h"
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+#include "tflite/ext/kernels/register.h"
+
+#include "misc/benchmark.h"
+
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/mean.hpp>
+#include <boost/accumulators/statistics/min.hpp>
+#include <boost/accumulators/statistics/max.hpp>
+
+#include <cassert>
+#include <chrono>
+#include <sstream>
+
+#include <android/log.h>
+
+using namespace tflite;
+using namespace tflite::ops::builtin;
+
+static StderrReporter error_reporter;
+
+static std::unique_ptr<FlatBufferModel> model;
+
+inline void setText(JNIEnv *env, jobject thisObj, const std::string &message)
+{
+ jclass thisClass = env->GetObjectClass(thisObj);
+ jmethodID setTextMethod = env->GetMethodID(thisClass, "setText", "(Ljava/lang/String;)V");
+
+ assert(setTextMethod != nullptr);
+
+ env->CallVoidMethod(thisObj, setTextMethod, env->NewStringUTF(message.c_str()));
+}
+
+inline void setTitle(JNIEnv *env, jobject thisObj, const std::string &message)
+{
+ jclass thisClass = env->GetObjectClass(thisObj);
+ jmethodID setTextMethod = env->GetMethodID(thisClass, "setTitle", "(Ljava/lang/String;)V");
+
+ assert(setTextMethod != nullptr);
+
+ env->CallVoidMethod(thisObj, setTextMethod, env->NewStringUTF(message.c_str()));
+
+ // Clear message
+ setText(env, thisObj, "");
+}
+
+inline void setText(JNIEnv *env, jobject thisObj, const std::stringstream &ss)
+{
+ setText(env, thisObj, ss.str());
+}
+
+inline std::unique_ptr<FlatBufferModel> loadModel(JNIEnv *env, jobject thisObj,
+ jobject model_buffer)
+{
+ const char *model_base = static_cast<char *>(env->GetDirectBufferAddress(model_buffer));
+ jlong model_size = env->GetDirectBufferCapacity(model_buffer);
+
+ return FlatBufferModel::BuildFromBuffer(model_base, static_cast<size_t>(model_size),
+ &error_reporter);
+}
+
+struct Activity
+{
+ virtual ~Activity() = default;
+
+ virtual void prepare(void) const = 0;
+ virtual void run(void) const = 0;
+ virtual void teardown(void) const = 0;
+};
+
+struct LiteActivity final : public Activity
+{
+public:
+ LiteActivity(nnfw::tflite::Session &sess) : _sess(sess)
+ {
+ // DO NOTHING
+ }
+
+public:
+ void prepare(void) const override { _sess.prepare(); }
+ void run(void) const override { _sess.run(); }
+ void teardown(void) const override { _sess.teardown(); }
+
+private:
+ nnfw::tflite::Session &_sess;
+};
+
+struct SimpleActivity final : public Activity
+{
+public:
+ SimpleActivity(const std::function<void(void)> &fn) : _fn{fn}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void prepare(void) const override {}
+ void run(void) const override { _fn(); }
+ void teardown(void) const override {}
+
+private:
+ std::function<void(void)> _fn;
+};
+
+inline void runBenchmark(JNIEnv *env, jobject thisObj, Activity &act)
+{
+ auto runTrial = [&](void) {
+ std::chrono::milliseconds elapsed(0);
+
+ act.prepare();
+ nnfw::misc::benchmark::measure(elapsed) << [&](void) { act.run(); };
+ act.teardown();
+
+ return elapsed;
+ };
+
+ // Warm-up
+ for (uint32_t n = 0; n < 3; ++n)
+ {
+ auto elapsed = runTrial();
+
+ std::stringstream ss;
+ ss << "Warm-up #" << n << " takes " << elapsed.count() << "ms" << std::endl;
+ setText(env, thisObj, ss);
+ }
+
+ // Measure
+ using namespace boost::accumulators;
+
+ accumulator_set<double, stats<tag::mean, tag::min, tag::max>> acc;
+
+ for (uint32_t n = 0; n < 100; ++n)
+ {
+ auto elapsed = runTrial();
+
+ std::stringstream ss;
+ ss << "Iteration #" << n << " takes " << elapsed.count() << "ms" << std::endl;
+ setText(env, thisObj, ss);
+
+ acc(elapsed.count());
+ }
+
+ std::stringstream ss;
+ ss << "Average is " << mean(acc) << "ms" << std::endl;
+ ss << "Min is " << min(acc) << "ms" << std::endl;
+ ss << "Max is " << max(acc) << "ms" << std::endl;
+ setText(env, thisObj, ss);
+}
+
+JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runInterpreterBenchmark(
+ JNIEnv *env, jobject thisObj, jobject model_buffer)
+{
+ setTitle(env, thisObj, "Running Interpreter Benchmark");
+
+ auto model = loadModel(env, thisObj, model_buffer);
+ assert(model != nullptr);
+
+ nnfw::tflite::BuiltinOpResolver resolver;
+ InterpreterBuilder builder(*model, resolver);
+
+ std::unique_ptr<Interpreter> interpreter;
+
+ TFLITE_ENSURE(builder(&interpreter));
+
+ interpreter->SetNumThreads(-1);
+
+ nnfw::tflite::InterpreterSession sess(interpreter.get());
+ LiteActivity act{sess};
+ runBenchmark(env, thisObj, act);
+}
+
+static void runNNAPIBenchmark(JNIEnv *env, jobject thisObj, jobject model_buffer)
+{
+ auto model = loadModel(env, thisObj, model_buffer);
+ assert(model != nullptr);
+
+ nnfw::tflite::BuiltinOpResolver resolver;
+ InterpreterBuilder builder(*model, resolver);
+
+ std::unique_ptr<Interpreter> interpreter;
+
+ TFLITE_ENSURE(builder(&interpreter));
+
+ nnfw::tflite::NNAPISession sess(interpreter.get());
+ LiteActivity act{sess};
+ runBenchmark(env, thisObj, act);
+}
+
+JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runNNAPIBenchmark(JNIEnv *env,
+ jobject thisObj,
+ jobject model_buffer)
+{
+ setTitle(env, thisObj, "Running NNAPI Benchmark");
+
+ try
+ {
+ runNNAPIBenchmark(env, thisObj, model_buffer);
+ }
+ catch (const std::exception &ex)
+ {
+ std::stringstream ss;
+ ss << "Caught an exception " << ex.what();
+ setText(env, thisObj, ss);
+ }
+}
+
+JNIEXPORT jstring JNICALL Java_com_ndk_tflbench_MainActivity_getModelName(JNIEnv *env,
+ jobject thisObj)
+{
+ return env->NewStringUTF(MODEL_NAME);
+}
+
+#define TF_ENSURE(e) \
+ { \
+ if (!(e).ok()) \
+ { \
+ throw std::runtime_error{"'" #e "' FAILED"}; \
+ } \
+ }
diff --git a/runtime/contrib/android_benchmark_app/cpp/ndk_main.h b/runtime/contrib/android_benchmark_app/cpp/ndk_main.h
new file mode 100644
index 000000000..8de39ce9f
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/cpp/ndk_main.h
@@ -0,0 +1,92 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class com_ndk_tflbench_MainActivity */
+
+#ifndef _Included_com_ndk_tflbench_MainActivity
+#define _Included_com_ndk_tflbench_MainActivity
+#ifdef __cplusplus
+extern "C" {
+#endif
+#undef com_ndk_tflbench_MainActivity_BIND_ABOVE_CLIENT
+#define com_ndk_tflbench_MainActivity_BIND_ABOVE_CLIENT 8L
+#undef com_ndk_tflbench_MainActivity_BIND_ADJUST_WITH_ACTIVITY
+#define com_ndk_tflbench_MainActivity_BIND_ADJUST_WITH_ACTIVITY 128L
+#undef com_ndk_tflbench_MainActivity_BIND_ALLOW_OOM_MANAGEMENT
+#define com_ndk_tflbench_MainActivity_BIND_ALLOW_OOM_MANAGEMENT 16L
+#undef com_ndk_tflbench_MainActivity_BIND_AUTO_CREATE
+#define com_ndk_tflbench_MainActivity_BIND_AUTO_CREATE 1L
+#undef com_ndk_tflbench_MainActivity_BIND_DEBUG_UNBIND
+#define com_ndk_tflbench_MainActivity_BIND_DEBUG_UNBIND 2L
+#undef com_ndk_tflbench_MainActivity_BIND_EXTERNAL_SERVICE
+#define com_ndk_tflbench_MainActivity_BIND_EXTERNAL_SERVICE -2147483648L
+#undef com_ndk_tflbench_MainActivity_BIND_IMPORTANT
+#define com_ndk_tflbench_MainActivity_BIND_IMPORTANT 64L
+#undef com_ndk_tflbench_MainActivity_BIND_NOT_FOREGROUND
+#define com_ndk_tflbench_MainActivity_BIND_NOT_FOREGROUND 4L
+#undef com_ndk_tflbench_MainActivity_BIND_WAIVE_PRIORITY
+#define com_ndk_tflbench_MainActivity_BIND_WAIVE_PRIORITY 32L
+#undef com_ndk_tflbench_MainActivity_CONTEXT_IGNORE_SECURITY
+#define com_ndk_tflbench_MainActivity_CONTEXT_IGNORE_SECURITY 2L
+#undef com_ndk_tflbench_MainActivity_CONTEXT_INCLUDE_CODE
+#define com_ndk_tflbench_MainActivity_CONTEXT_INCLUDE_CODE 1L
+#undef com_ndk_tflbench_MainActivity_CONTEXT_RESTRICTED
+#define com_ndk_tflbench_MainActivity_CONTEXT_RESTRICTED 4L
+#undef com_ndk_tflbench_MainActivity_MODE_APPEND
+#define com_ndk_tflbench_MainActivity_MODE_APPEND 32768L
+#undef com_ndk_tflbench_MainActivity_MODE_ENABLE_WRITE_AHEAD_LOGGING
+#define com_ndk_tflbench_MainActivity_MODE_ENABLE_WRITE_AHEAD_LOGGING 8L
+#undef com_ndk_tflbench_MainActivity_MODE_MULTI_PROCESS
+#define com_ndk_tflbench_MainActivity_MODE_MULTI_PROCESS 4L
+#undef com_ndk_tflbench_MainActivity_MODE_NO_LOCALIZED_COLLATORS
+#define com_ndk_tflbench_MainActivity_MODE_NO_LOCALIZED_COLLATORS 16L
+#undef com_ndk_tflbench_MainActivity_MODE_PRIVATE
+#define com_ndk_tflbench_MainActivity_MODE_PRIVATE 0L
+#undef com_ndk_tflbench_MainActivity_MODE_WORLD_READABLE
+#define com_ndk_tflbench_MainActivity_MODE_WORLD_READABLE 1L
+#undef com_ndk_tflbench_MainActivity_MODE_WORLD_WRITEABLE
+#define com_ndk_tflbench_MainActivity_MODE_WORLD_WRITEABLE 2L
+#undef com_ndk_tflbench_MainActivity_RECEIVER_VISIBLE_TO_INSTANT_APPS
+#define com_ndk_tflbench_MainActivity_RECEIVER_VISIBLE_TO_INSTANT_APPS 1L
+#undef com_ndk_tflbench_MainActivity_DEFAULT_KEYS_DIALER
+#define com_ndk_tflbench_MainActivity_DEFAULT_KEYS_DIALER 1L
+#undef com_ndk_tflbench_MainActivity_DEFAULT_KEYS_DISABLE
+#define com_ndk_tflbench_MainActivity_DEFAULT_KEYS_DISABLE 0L
+#undef com_ndk_tflbench_MainActivity_DEFAULT_KEYS_SEARCH_GLOBAL
+#define com_ndk_tflbench_MainActivity_DEFAULT_KEYS_SEARCH_GLOBAL 4L
+#undef com_ndk_tflbench_MainActivity_DEFAULT_KEYS_SEARCH_LOCAL
+#define com_ndk_tflbench_MainActivity_DEFAULT_KEYS_SEARCH_LOCAL 3L
+#undef com_ndk_tflbench_MainActivity_DEFAULT_KEYS_SHORTCUT
+#define com_ndk_tflbench_MainActivity_DEFAULT_KEYS_SHORTCUT 2L
+#undef com_ndk_tflbench_MainActivity_RESULT_CANCELED
+#define com_ndk_tflbench_MainActivity_RESULT_CANCELED 0L
+#undef com_ndk_tflbench_MainActivity_RESULT_FIRST_USER
+#define com_ndk_tflbench_MainActivity_RESULT_FIRST_USER 1L
+#undef com_ndk_tflbench_MainActivity_RESULT_OK
+#define com_ndk_tflbench_MainActivity_RESULT_OK -1L
+/*
+ * Class: com_ndk_tflbench_MainActivity
+ * Method: getModelName
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_com_ndk_tflbench_MainActivity_getModelName(JNIEnv *, jobject);
+
+/*
+ * Class: com_ndk_tflbench_MainActivity
+ * Method: runInterpreterBenchmark
+ * Signature: (Ljava/nio/MappedByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runInterpreterBenchmark(JNIEnv *, jobject,
+ jobject);
+
+/*
+ * Class: com_ndk_tflbench_MainActivity
+ * Method: runNNAPIBenchmark
+ * Signature: (Ljava/nio/MappedByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_com_ndk_tflbench_MainActivity_runNNAPIBenchmark(JNIEnv *, jobject,
+ jobject);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/runtime/contrib/android_benchmark_app/java/com/ndk/tflbench/MainActivity.java b/runtime/contrib/android_benchmark_app/java/com/ndk/tflbench/MainActivity.java
new file mode 100644
index 000000000..14bf239a6
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/java/com/ndk/tflbench/MainActivity.java
@@ -0,0 +1,110 @@
+package com.ndk.tflbench;
+
+import android.app.Activity;
+import android.os.Bundle;
+import android.content.Intent;
+import android.view.View;
+import android.view.Menu;
+import android.view.MenuItem;
+import android.widget.TextView;
+import android.widget.Button;
+import android.net.Uri;
+import android.content.res.AssetFileDescriptor;
+import android.content.res.AssetManager;
+import android.graphics.Bitmap;
+import android.os.SystemClock;
+import android.os.Trace;
+import android.util.Log;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Vector;
+
+public class MainActivity extends Activity {
+
+ static {
+ System.loadLibrary("android_benchmark_native");
+ }
+
+ private void setModel(final String message) {
+ final TextView textView = (TextView)findViewById(R.id.model_label);
+ runOnUiThread(new Runnable() {
+ @Override
+ public void run() { textView.setText(message); }
+ });
+ }
+
+ private void setTitle(final String message) {
+ final TextView textView = (TextView)findViewById(R.id.title_label);
+ runOnUiThread(new Runnable() {
+ @Override
+ public void run() { textView.setText(message); }
+ });
+ }
+
+ private void setText(final String message) {
+ final TextView textView = (TextView)findViewById(R.id.message_label);
+ runOnUiThread(new Runnable() {
+ @Override
+ public void run() { textView.setText(message); }
+ });
+ }
+
+ private MappedByteBuffer buffer;
+
+ @Override
+ protected void onCreate(Bundle savedInstanceState) {
+ super.onCreate(savedInstanceState);
+ setContentView(R.layout.activity_main);
+
+ setModel(getModelName());
+
+ // Load Tensorflow Lite model
+ try
+ {
+ AssetManager assets = getAssets();
+ AssetFileDescriptor fileDescriptor = assets.openFd("model.tflite");
+ FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
+ FileChannel fileChannel = inputStream.getChannel();
+ final long startOffset = fileDescriptor.getStartOffset();
+ final long declaredLength = fileDescriptor.getDeclaredLength();
+
+ buffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
+ } catch (IOException e) {
+ Log.e("MYAPP", "exception", e);
+ }
+
+ Button btn_interp = (Button)findViewById(R.id.button_interp);
+ btn_interp.setOnClickListener(new Button.OnClickListener() {
+ @Override public void onClick(View view) {
+ new Thread(new Runnable() {
+ @Override
+ public void run() { runInterpreterBenchmark(buffer); }
+ }).start();
+ }
+ });
+
+ Button btn_nnapi = (Button)findViewById(R.id.button_nnapi);
+ btn_nnapi.setOnClickListener(new Button.OnClickListener() {
+ @Override public void onClick(View view) {
+ new Thread(new Runnable() {
+ @Override
+ public void run() { runNNAPIBenchmark(buffer); }
+ }).start();
+ }
+ });
+ }
+
+ public native String getModelName();
+ public native void runInterpreterBenchmark(MappedByteBuffer buffer);
+ public native void runNNAPIBenchmark(MappedByteBuffer buffer);
+}
diff --git a/runtime/contrib/android_benchmark_app/res/drawable-hdpi/ic_launcher.png b/runtime/contrib/android_benchmark_app/res/drawable-hdpi/ic_launcher.png
new file mode 100644
index 000000000..96a442e5b
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/drawable-hdpi/ic_launcher.png
Binary files differ
diff --git a/runtime/contrib/android_benchmark_app/res/drawable-mdpi/ic_launcher.png b/runtime/contrib/android_benchmark_app/res/drawable-mdpi/ic_launcher.png
new file mode 100644
index 000000000..359047dfa
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/drawable-mdpi/ic_launcher.png
Binary files differ
diff --git a/runtime/contrib/android_benchmark_app/res/drawable-xhdpi/ic_launcher.png b/runtime/contrib/android_benchmark_app/res/drawable-xhdpi/ic_launcher.png
new file mode 100644
index 000000000..71c6d760f
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/drawable-xhdpi/ic_launcher.png
Binary files differ
diff --git a/runtime/contrib/android_benchmark_app/res/drawable-xxhdpi/ic_launcher.png b/runtime/contrib/android_benchmark_app/res/drawable-xxhdpi/ic_launcher.png
new file mode 100644
index 000000000..4df189464
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/drawable-xxhdpi/ic_launcher.png
Binary files differ
diff --git a/runtime/contrib/android_benchmark_app/res/layout/activity_main.xml b/runtime/contrib/android_benchmark_app/res/layout/activity_main.xml
new file mode 100644
index 000000000..551952582
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/layout/activity_main.xml
@@ -0,0 +1,38 @@
+<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+ xmlns:tools="http://schemas.android.com/tools"
+ android:layout_width="match_parent"
+ android:layout_height="match_parent"
+ android:orientation="vertical">
+
+ <Button android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:id="@+id/button_interp"
+ android:text="Run Interp benchmark"/>
+
+ <Button android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:id="@+id/button_nnapi"
+ android:text="Run NNAPI benchmark"/>
+
+ <TextView android:text=""
+ android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:id="@+id/model_label"
+ android:layout_centerHorizontal="true"
+ android:textSize="16dp"/>
+
+ <TextView android:text=""
+ android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:id="@+id/title_label"
+ android:layout_centerHorizontal="true"
+ android:textSize="16dp"/>
+
+ <TextView android:text=""
+ android:layout_width="match_parent"
+ android:layout_height="wrap_content"
+ android:id="@+id/message_label"
+ android:layout_centerHorizontal="true"
+ android:textSize="16dp"/>
+
+</LinearLayout>
diff --git a/runtime/contrib/android_benchmark_app/res/values-v21/styles.xml b/runtime/contrib/android_benchmark_app/res/values-v21/styles.xml
new file mode 100644
index 000000000..dba3c417b
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/values-v21/styles.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+ <style name="AppTheme" parent="android:Theme.Material.Light">
+ </style>
+</resources>
diff --git a/runtime/contrib/android_benchmark_app/res/values/strings.xml b/runtime/contrib/android_benchmark_app/res/values/strings.xml
new file mode 100644
index 000000000..3c6c4aaa4
--- /dev/null
+++ b/runtime/contrib/android_benchmark_app/res/values/strings.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+
+ <string name="app_name">T/F Lite Benchmark</string>
+
+</resources>
diff --git a/runtime/contrib/android_tflite/CMakeLists.txt b/runtime/contrib/android_tflite/CMakeLists.txt
new file mode 100644
index 000000000..b65a7c525
--- /dev/null
+++ b/runtime/contrib/android_tflite/CMakeLists.txt
@@ -0,0 +1,31 @@
+if(NOT BUILD_ANDROID_TFLITE)
+ return()
+endif(NOT BUILD_ANDROID_TFLITE)
+
+nnfw_find_package(TensorFlowLite REQUIRED)
+
+if(NOT DEFINED NDK_DIR)
+ message(FATAL_ERROR "NDK_DIR should be specified via environment variable")
+endif()
+message(STATUS "Found NDK: ${NDK_DIR}")
+
+#
+# Tensorflow Lite JNI library
+#
+set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
+set(TFLITE_JNI_BASE ${TENSORFLOW_LITE_BASE}/java/src/main/native)
+set(TFLITE_JNI_SRCS ${TFLITE_JNI_BASE}/exception_jni.cc
+ ${TFLITE_JNI_BASE}/nativeinterpreterwrapper_jni.cc
+ ${TFLITE_JNI_BASE}/tensor_jni.cc
+ ${TFLITE_JNI_BASE}/tensorflow_lite_jni.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/builtin_ops_jni.cc # Use nnfw's OpResolver
+ )
+set(TFLITE_JNI_INCLUDES ${TENSORFLOW_LITE_BASE}/java/src/native)
+
+# TODO use tensorflow-lite static library instead of compiling all the sources again
+add_library(tensorflowlite_jni SHARED ${TFLITE_JNI_SRCS} ${TFLITE_SRCS})
+target_include_directories(tensorflowlite_jni PUBLIC ${TFLITE_JNI_INCLUDES} ${TFLITE_INCLUDES})
+target_link_libraries(tensorflowlite_jni eigen ${LIB_PTHREAD} dl)
+target_link_libraries(tensorflowlite_jni log)
+target_link_libraries(tensorflowlite_jni nnfw_lib_tflite)
+install(TARGETS tensorflowlite_jni DESTINATION lib)
diff --git a/runtime/contrib/android_tflite/builtin_ops_jni.cc b/runtime/contrib/android_tflite/builtin_ops_jni.cc
new file mode 100644
index 000000000..af9d3325d
--- /dev/null
+++ b/runtime/contrib/android_tflite/builtin_ops_jni.cc
@@ -0,0 +1,29 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ Copyright 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/register.h"
+#include "tflite/ext/kernels/register.h"
+
+namespace tflite
+{
+
+std::unique_ptr<OpResolver> CreateOpResolver()
+{
+ return std::unique_ptr<::nnfw::tflite::BuiltinOpResolver>(
+ new ::nnfw::tflite::BuiltinOpResolver());
+}
+
+} // namespace tflite
diff --git a/runtime/contrib/benchmark_acl/.FORMATDENY b/runtime/contrib/benchmark_acl/.FORMATDENY
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/.FORMATDENY
diff --git a/runtime/contrib/benchmark_acl/CMakeLists.txt b/runtime/contrib/benchmark_acl/CMakeLists.txt
new file mode 100644
index 000000000..528db4142
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/CMakeLists.txt
@@ -0,0 +1,24 @@
+if(NOT BUILD_BENCHMARK_ACL)
+ return()
+endif(NOT BUILD_BENCHMARK_ACL)
+
+nnas_find_package(ARMCompute REQUIRED)
+
+add_library(arm_compute_benchmark SHARED "src/Benchmark.cpp")
+target_include_directories(arm_compute_benchmark PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(arm_compute_benchmark arm_compute_graph)
+install(TARGETS arm_compute_benchmark DESTINATION lib)
+
+# GoogLeNet benchmark
+add_executable(benchmark_googlenet "src/benchmark_googlenet.cpp")
+target_link_libraries(benchmark_googlenet arm_compute_benchmark)
+
+# GoogLeNet benchmark
+add_executable(benchmark_inception_v3 "src/benchmark_inception_v3.cpp")
+target_link_libraries(benchmark_inception_v3 arm_compute_benchmark)
+
+# MobileNet benchmark
+add_executable(benchmark_mobilenet "src/benchmark_mobilenet.cpp")
+target_link_libraries(benchmark_mobilenet arm_compute_benchmark)
+
+install(TARGETS benchmark_googlenet benchmark_inception_v3 benchmark_mobilenet DESTINATION bin)
diff --git a/runtime/contrib/benchmark_acl/src/Benchmark.cpp b/runtime/contrib/benchmark_acl/src/Benchmark.cpp
new file mode 100644
index 000000000..4a761ec76
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/src/Benchmark.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Benchmark.h"
+
+#include <cstdlib>
+
+Count::Count() : _value(1)
+{
+ auto env = std::getenv("COUNT");
+
+ if (env)
+ {
+ _value = std::strtol(env, NULL, 0);
+ }
+}
+
+uint32_t Count::value(void) const { return _value; }
+
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/mean.hpp>
+
+#include <iostream>
+#include <chrono>
+
+using namespace boost::accumulators;
+
+void run_benchmark(arm_compute::graph::frontend::Stream &graph)
+{
+ // NOTE Here the number of warming-up iterations is hardcoded
+ // TODO Decide the number of warming-up iterations appropriately
+ for (uint32_t n = 0; n < 3; ++n)
+ {
+ auto beg = std::chrono::steady_clock::now();
+ graph.run();
+ auto end = std::chrono::steady_clock::now();
+ auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg);
+
+ std::cout << "Warming-up " << n << ": " << elapsed.count() << "ms" << std::endl;
+ }
+
+ accumulator_set<double, stats<tag::mean>> acc;
+
+ const Count count;
+
+ for (uint32_t n = 0; n < count.value(); ++n)
+ {
+ auto beg = std::chrono::steady_clock::now();
+ graph.run();
+ auto end = std::chrono::steady_clock::now();
+ auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - beg);
+
+ std::cout << "Iteration " << n << ": " << elapsed.count() << "ms" << std::endl;
+
+ acc(elapsed.count());
+ }
+
+ std::cout << "--------" << std::endl;
+ std::cout << "Mean: " << mean(acc) << "ms" << std::endl;
+}
diff --git a/runtime/contrib/benchmark_acl/src/Benchmark.h b/runtime/contrib/benchmark_acl/src/Benchmark.h
new file mode 100644
index 000000000..200f40952
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/src/Benchmark.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ACL_BENCHMARK_H__
+#define __ACL_BENCHMARK_H__
+
+#include "arm_compute/graph/ITensorAccessor.h"
+#include "arm_compute/graph.h"
+#include "arm_compute/core/CL/OpenCL.h"
+
+struct InputAccessor final : public arm_compute::graph::ITensorAccessor
+{
+ InputAccessor() = default;
+ /** Allows instances to move constructed */
+ InputAccessor(InputAccessor &&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor &tensor) override
+ {
+ return true;
+ }
+};
+
+struct OutputAccessor final : public arm_compute::graph::ITensorAccessor
+{
+ OutputAccessor() = default;
+ /** Allows instances to move constructed */
+ OutputAccessor(OutputAccessor &&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor &tensor) override
+ {
+ return false;
+ }
+};
+
+template <typename T> std::unique_ptr<arm_compute::graph::ITensorAccessor> get_accessor()
+{
+ return std::unique_ptr<T>(new T());
+}
+
+class Count
+{
+public:
+ Count();
+
+public:
+ uint32_t value(void) const;
+
+private:
+ uint32_t _value;
+};
+
+inline arm_compute::graph::Target set_target_hint(int target)
+{
+ if(target == 1 && arm_compute::opencl_is_available())
+ {
+ // If type of target is OpenCL, check if OpenCL is available and initialize the scheduler
+ return arm_compute::graph::Target::CL;
+ }
+ else
+ {
+ return arm_compute::graph::Target::NEON;
+ }
+}
+
+void run_benchmark(arm_compute::graph::frontend::Stream &graph);
+
+#endif
diff --git a/runtime/contrib/benchmark_acl/src/benchmark_googlenet.cpp b/runtime/contrib/benchmark_acl/src/benchmark_googlenet.cpp
new file mode 100644
index 000000000..8b0fbfdac
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/src/benchmark_googlenet.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph.h"
+
+#include "Benchmark.h"
+
+#include <cstdlib>
+#include <tuple>
+
+using namespace arm_compute::graph::frontend;
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_input_accessor(void)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_random_accessor(float lower, float upper)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_weights_accessor(const std::string &path, const std::string &data_file, DataLayout file_layout = DataLayout::NCHW)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_output_accessor(void)
+{
+ return get_accessor<OutputAccessor>();
+}
+
+/** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ */
+class GraphGooglenetExample
+{
+public:
+ void do_setup(int argc, char **argv)
+ {
+ std::string data_path; /* Path to the trainable data */
+ std::string image; /* Image data */
+ std::string label; /* Label data */
+
+ const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
+ // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
+ const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+ Target target_hint = set_target_hint(target);
+ FastMathHint fast_math_hint = FastMathHint::Disabled;
+
+ // Parse arguments
+ if(argc < 2)
+ {
+ // Print help
+ std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 2)
+ {
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 3)
+ {
+ data_path = argv[2];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No image provided: using random values\n\n";
+ }
+ else if(argc == 4)
+ {
+ data_path = argv[2];
+ image = argv[3];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
+ std::cout << "No text file with labels provided: skipping output accessor\n\n";
+ }
+ else if(argc == 5)
+ {
+ data_path = argv[2];
+ image = argv[3];
+ label = argv[4];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+ std::cout << "No fast math info provided: disabling fast math\n\n";
+ }
+ else
+ {
+ data_path = argv[2];
+ image = argv[3];
+ label = argv[4];
+ fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::Disabled : FastMathHint::Enabled;
+ }
+
+ graph << target_hint
+ << fast_math_hint
+ << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
+ get_input_accessor())
+ << ConvolutionLayer(
+ 7U, 7U, 64U,
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"),
+ PadStrideInfo(2, 2, 3, 3))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+ << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+ << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+ << ConvolutionLayer(
+ 1U, 1U, 64U,
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+ << ConvolutionLayer(
+ 3U, 3U, 192U,
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"),
+ PadStrideInfo(1, 1, 1, 1))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+ << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+ << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
+ graph << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U);
+ graph << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U);
+ graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
+ graph << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U);
+ graph << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U);
+ graph << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U);
+ graph << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U);
+ graph << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
+ graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
+ graph << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
+ graph << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U);
+ graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)))
+ << FullyConnectedLayer(
+ 1000U,
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy"),
+ get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy"))
+ << SoftmaxLayer()
+ << OutputLayer(get_output_accessor());
+
+ // Finalize graph
+ GraphConfig config;
+ config.use_tuner = (target == 2);
+ graph.finalize(target_hint, config);
+ }
+ void do_run()
+ {
+ run_benchmark(graph);
+ }
+
+private:
+ Stream graph{ 0, "GoogleNet" };
+
+ ConcatLayer get_inception_node(const std::string &data_path, std::string &&param_path,
+ unsigned int a_filt,
+ std::tuple<unsigned int, unsigned int> b_filters,
+ std::tuple<unsigned int, unsigned int> c_filters,
+ unsigned int d_filt)
+ {
+ std::string total_path = "/cnn_data/googlenet_model/" + param_path + "/" + param_path + "_";
+ SubStream i_a(graph);
+ i_a << ConvolutionLayer(
+ 1U, 1U, a_filt,
+ get_weights_accessor(data_path, total_path + "1x1_w.npy"),
+ get_weights_accessor(data_path, total_path + "1x1_b.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ SubStream i_b(graph);
+ i_b << ConvolutionLayer(
+ 1U, 1U, std::get<0>(b_filters),
+ get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy"),
+ get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+ << ConvolutionLayer(
+ 3U, 3U, std::get<1>(b_filters),
+ get_weights_accessor(data_path, total_path + "3x3_w.npy"),
+ get_weights_accessor(data_path, total_path + "3x3_b.npy"),
+ PadStrideInfo(1, 1, 1, 1))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ SubStream i_c(graph);
+ i_c << ConvolutionLayer(
+ 1U, 1U, std::get<0>(c_filters),
+ get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy"),
+ get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+ << ConvolutionLayer(
+ 5U, 5U, std::get<1>(c_filters),
+ get_weights_accessor(data_path, total_path + "5x5_w.npy"),
+ get_weights_accessor(data_path, total_path + "5x5_b.npy"),
+ PadStrideInfo(1, 1, 2, 2))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ SubStream i_d(graph);
+ i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)))
+ << ConvolutionLayer(
+ 1U, 1U, d_filt,
+ get_weights_accessor(data_path, total_path + "pool_proj_w.npy"),
+ get_weights_accessor(data_path, total_path + "pool_proj_b.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+
+ return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
+ }
+};
+
+/** Main program for Googlenet
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ */
+int main(int argc, char **argv)
+{
+ GraphGooglenetExample example;
+
+ example.do_setup(argc, argv);
+ example.do_run();
+
+ return 0;
+}
diff --git a/runtime/contrib/benchmark_acl/src/benchmark_inception_v3.cpp b/runtime/contrib/benchmark_acl/src/benchmark_inception_v3.cpp
new file mode 100644
index 000000000..382851f50
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/src/benchmark_inception_v3.cpp
@@ -0,0 +1,891 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph.h"
+
+#include "Benchmark.h"
+
+#include <cstdlib>
+#include <tuple>
+
+using namespace arm_compute::graph::frontend;
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_input_accessor(void)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_random_accessor(float lower, float upper)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_weights_accessor(const std::string &path, const std::string &data_file, DataLayout file_layout = DataLayout::NCHW)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_output_accessor(void)
+{
+ return get_accessor<OutputAccessor>();
+}
+
+/** Example demonstrating how to implement InceptionV3's network using the Compute Library's graph API
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ */
+class InceptionV3Example
+{
+public:
+ void do_setup(int argc, char **argv)
+ {
+ std::string data_path; /* Path to the trainable data */
+ std::string image; /* Image data */
+ std::string label; /* Label data */
+
+ // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
+ const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+ Target target_hint = set_target_hint(target);
+ FastMathHint fast_math_hint = FastMathHint::Disabled;
+
+ // Parse arguments
+ if(argc < 2)
+ {
+ // Print help
+ std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 2)
+ {
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 3)
+ {
+ data_path = argv[2];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No image provided: using random values\n\n";
+ }
+ else if(argc == 4)
+ {
+ data_path = argv[2];
+ image = argv[3];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
+ std::cout << "No text file with labels provided: skipping output accessor\n\n";
+ }
+ else if(argc == 5)
+ {
+ data_path = argv[2];
+ image = argv[3];
+ label = argv[4];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+ std::cout << "No fast math info provided: disabling fast math\n\n";
+ }
+ else
+ {
+ data_path = argv[2];
+ image = argv[3];
+ label = argv[4];
+ fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::Disabled : FastMathHint::Enabled;
+ }
+
+ graph << target_hint
+ << fast_math_hint
+ << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
+ get_input_accessor())
+ << ConvolutionLayer(3U, 3U, 32U,
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+ .set_name("Conv2d_1a_3x3/convolution")
+ << BatchNormalizationLayer(get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f), get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
+ << ConvolutionLayer(3U, 3U, 32U,
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+ .set_name("Conv2d_2a_3x3/convolution")
+ << BatchNormalizationLayer(get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f), get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name("Conv2d_2a_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
+
+ << ConvolutionLayer(3U, 3U, 64U,
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+ .set_name("Conv2d_2b_3x3/convolution")
+ << BatchNormalizationLayer(get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f), get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name("Conv2d_2b_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu")
+
+ << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool")
+
+ << ConvolutionLayer(1U, 1U, 80U,
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+ .set_name("Conv2d_3b_1x1/convolution")
+ << BatchNormalizationLayer(get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f), get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name("Conv2d_3b_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
+
+ << ConvolutionLayer(3U, 3U, 192U,
+ get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+ .set_name("Conv2d_4a_3x3/convolution")
+ << BatchNormalizationLayer(get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f), get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name("Conv2d_4a_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu")
+
+ << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool");
+
+ graph << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+ 32U)
+ .set_name("Mixed_5b/concat");
+ graph << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+ 64U, true)
+ .set_name("Mixed_5c/concat");
+ graph << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+ 64U)
+ .set_name("Mixed_5d/concat");
+
+ graph << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
+
+ graph << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U),
+ std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U)
+ .set_name("Mixed_6b/concat");
+ graph << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U),
+ std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
+ .set_name("Mixed_6c/concat");
+ graph << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U),
+ std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
+ .set_name("Mixed_6d/concat");
+ graph << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U),
+ std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U)
+ .set_name("Mixed_6e/concat");
+
+ graph << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U),
+ std::make_tuple(192U, 192U, 192U, 192U))
+ .set_name("Mixed_7a/concat");
+
+ graph << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U),
+ std::make_tuple(448U, 384U, 384U, 384U), 192U)
+ .set_name("Mixed_7b/concat");
+ graph << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U),
+ std::make_tuple(448U, 384U, 384U, 384U), 192U, true)
+ .set_name("Mixed_7c/concat");
+
+ graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool")
+ << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy"),
+ get_weights_accessor(data_path,
+ "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name("Logits/Conv2d_1c_1x1/convolution")
+ << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape")
+ << SoftmaxLayer().set_name("Predictions/Softmax")
+ << OutputLayer(get_output_accessor());
+
+ // Finalize graph
+ GraphConfig config;
+ config.use_tuner = (target == 2);
+ graph.finalize(target_hint, config);
+ }
+
+ void do_run()
+ {
+ run_benchmark(graph);
+ }
+
+private:
+ Stream graph{ 0, "InceptionV3" };
+
+private:
+ ConcatLayer get_inception_node_A(const std::string &data_path, std::string &&param_path,
+ unsigned int a_filt,
+ std::tuple<unsigned int, unsigned int> b_filters,
+ std::tuple<unsigned int, unsigned int, unsigned int> c_filters,
+ unsigned int d_filt,
+ bool is_name_different = false)
+ {
+ std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
+
+ // This is due to a naming issue in the tf model
+ std::string conv_id0 = "_0a_";
+ std::string conv_id1 = "2d_0b_";
+ if(is_name_different)
+ {
+ conv_id0 = "_0b_";
+ conv_id1 = "_1_0c_";
+ }
+
+ SubStream i_a(graph);
+ i_a << ConvolutionLayer(
+ 1U, 1U, a_filt,
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+
+ SubStream i_b(graph);
+ i_b << ConvolutionLayer(
+ 1U, 1U, std::get<0>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu")
+ << ConvolutionLayer(
+ 5U, 5U, std::get<1>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 2, 2))
+ .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/Relu");
+
+ SubStream i_c(graph);
+ i_c << ConvolutionLayer(
+ 1U, 1U, std::get<0>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<1>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 1))
+ .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<2>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 1))
+ .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm/batcnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu");
+
+ SubStream i_d(graph);
+ i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+ << ConvolutionLayer(
+ 1U, 1U, d_filt,
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+
+ return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
+ }
+
+ ConcatLayer get_inception_node_B(const std::string &data_path, std::string &&param_path,
+ unsigned int a_filt,
+ std::tuple<unsigned int, unsigned int, unsigned int> b_filters)
+ {
+ std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
+ SubStream i_a(graph);
+ i_a << ConvolutionLayer(
+ 3U, 3U, a_filt,
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(2, 2, 0, 0))
+ .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_1x1/Relu");
+
+ SubStream i_b(graph);
+ i_b << ConvolutionLayer(
+ 1U, 1U, std::get<0>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<1>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 1))
+ .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<2>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(2, 2, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_1x1/Relu");
+
+ SubStream i_c(graph);
+ i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool");
+
+ return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c));
+ }
+
+ ConcatLayer get_inception_node_C(const std::string &data_path, std::string &&param_path,
+ unsigned int a_filt,
+ std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
+ std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
+ unsigned int d_filt)
+ {
+ std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
+ SubStream i_a(graph);
+ i_a << ConvolutionLayer(
+ 1U, 1U, a_filt,
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+
+ SubStream i_b(graph);
+ i_b << ConvolutionLayer(
+ 1U, 1U, std::get<0>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 7U, 1U, std::get<1>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 3, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
+ << ConvolutionLayer(
+ 1U, 7U, std::get<2>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 3))
+ .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0c_7x1/Relu");
+
+ SubStream i_c(graph);
+ i_c << ConvolutionLayer(
+ 1U, 1U, std::get<0>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 1U, 7U, std::get<1>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 3))
+ .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
+ << ConvolutionLayer(
+ 7U, 1U, std::get<2>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 3, 0))
+ .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
+ << ConvolutionLayer(
+ 1U, 7U, std::get<3>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 3))
+ .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
+ << ConvolutionLayer(
+ 7U, 1U, std::get<4>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 3, 0))
+ .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu");
+
+ SubStream i_d(graph);
+ i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+ << ConvolutionLayer(
+ 1U, 1U, d_filt,
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+
+ return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
+ }
+
+ ConcatLayer get_inception_node_D(const std::string &data_path, std::string &&param_path,
+ std::tuple<unsigned int, unsigned int> a_filters,
+ std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters)
+ {
+ std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
+ SubStream i_a(graph);
+ i_a << ConvolutionLayer(
+ 1U, 1U, std::get<0>(a_filters),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<1>(a_filters),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(2, 2, 0, 0))
+ .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_3x3/Relu");
+
+ SubStream i_b(graph);
+ i_b << ConvolutionLayer(
+ 1U, 1U, std::get<0>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 7U, 1U, std::get<1>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 3, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
+ << ConvolutionLayer(
+ 1U, 7U, std::get<2>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 3))
+ .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<3>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(2, 2, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_3x3/Relu");
+
+ SubStream i_c(graph);
+ i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool");
+
+ return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c));
+ }
+
+ ConcatLayer get_inception_node_E(const std::string &data_path, std::string &&param_path,
+ unsigned int a_filt,
+ std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
+ std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
+ unsigned int d_filt,
+ bool is_name_different = false)
+ {
+ // This is due to a naming issue in the tf model
+ std::string conv_id = "_0b_";
+ if(is_name_different)
+ {
+ conv_id = "_0c_";
+ }
+
+ std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
+ SubStream i_a(graph);
+ i_a << ConvolutionLayer(
+ 1U, 1U, a_filt,
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+
+ SubStream i_b(graph);
+ i_b << ConvolutionLayer(
+ 1U, 1U, std::get<0>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu");
+
+ SubStream i_b1(static_cast<IStream &>(i_b));
+ i_b1 << ConvolutionLayer(
+ 3U, 1U, std::get<1>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 0))
+ .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu");
+
+ SubStream i_b2(static_cast<IStream &>(i_b));
+ i_b2 << ConvolutionLayer(
+ 1U, 3U, std::get<2>(b_filters),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 1))
+ .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/Relu");
+
+ // Merge b1 and b2
+ i_b << ConcatLayer(std::move(i_b1), std::move(i_b2)).set_name(param_path + "/Branch_1/concat");
+
+ SubStream i_c(graph);
+ i_c << ConvolutionLayer(
+ 1U, 1U, std::get<0>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+ << ConvolutionLayer(
+ 3U, 3U, std::get<1>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 1))
+ .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu");
+
+ SubStream i_c1(static_cast<IStream &>(i_c));
+ i_c1 << ConvolutionLayer(
+ 3U, 1U, std::get<2>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 1, 0))
+ .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu");
+
+ SubStream i_c2(static_cast<IStream &>(i_c));
+ i_c2 << ConvolutionLayer(
+ 1U, 3U, std::get<3>(c_filters),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 1))
+ .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_3x1/Relu");
+
+ // Merge i_c1 and i_c2
+ i_c << ConcatLayer(std::move(i_c1), std::move(i_c2)).set_name(param_path + "/Branch_2/concat");
+
+ SubStream i_d(graph);
+ i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+ << ConvolutionLayer(
+ 1U, 1U, d_filt,
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+ get_random_accessor(1.f, 1.f),
+ get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+
+ return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
+ }
+};
+
+/** Main program for Inception V3
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ */
+int main(int argc, char **argv)
+{
+ InceptionV3Example example;
+
+ example.do_setup(argc, argv);
+ example.do_run();
+
+ return 0;
+}
diff --git a/runtime/contrib/benchmark_acl/src/benchmark_mobilenet.cpp b/runtime/contrib/benchmark_acl/src/benchmark_mobilenet.cpp
new file mode 100644
index 000000000..085be184e
--- /dev/null
+++ b/runtime/contrib/benchmark_acl/src/benchmark_mobilenet.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph.h"
+
+#include "Benchmark.h"
+
+#include <cstdlib>
+
+using namespace arm_compute::graph::frontend;
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_input_accessor(void)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_random_accessor(float lower, float upper)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_weights_accessor(const std::string &path, const std::string &data_file, DataLayout file_layout = DataLayout::NCHW)
+{
+ return get_accessor<InputAccessor>();
+}
+
+inline std::unique_ptr<arm_compute::graph::ITensorAccessor> get_output_accessor(void)
+{
+ return get_accessor<OutputAccessor>();
+}
+
+/** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL), [optional] Path to the weights folder, [optional] image, [optional] labels )
+ */
+class GraphMobilenetExample
+{
+public:
+ void do_setup(int argc, char **argv)
+ {
+ std::string data_path; /* Path to the trainable data */
+ std::string image; /* Image data */
+ std::string label; /* Label data */
+
+ // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
+ const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
+ Target target_hint = set_target_hint(target);
+ ConvolutionMethod convolution_hint = ConvolutionMethod::GEMM;
+ DepthwiseConvolutionMethod depthwise_convolution_hint = DepthwiseConvolutionMethod::Optimized3x3;
+ FastMathHint fast_math_hint = FastMathHint::Disabled;
+
+ // Set model to execute. 0 (MobileNetV1_1.0_224), 1 (MobileNetV1_0.75_160)
+ int model_id = (argc > 2) ? std::strtol(argv[2], nullptr, 10) : 0;
+ ARM_COMPUTE_ERROR_ON_MSG(model_id > 1, "Invalid model ID. Model must be 0 (MobileNetV1_1.0_224) or 1 (MobileNetV1_0.75_160)");
+ int layout_id = (argc > 3) ? std::strtol(argv[3], nullptr, 10) : 0;
+ ARM_COMPUTE_ERROR_ON_MSG(layout_id > 1, "Invalid layout ID. Layout must be 0 (NCHW) or 1 (NHWC)");
+
+ float depth_scale = (model_id == 0) ? 1.f : 0.75;
+ unsigned int spatial_size = (model_id == 0) ? 224 : 160;
+ std::string model_path = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
+ TensorDescriptor input_descriptor_nchw = TensorDescriptor(TensorShape(spatial_size, spatial_size, 3U, 1U), DataType::F32);
+ TensorDescriptor input_descriptor_nhwc = TensorDescriptor(TensorShape(3U, spatial_size, spatial_size, 1U), DataType::F32).set_layout(DataLayout::NHWC);
+ TensorDescriptor input_descriptor = (layout_id == 0) ? input_descriptor_nchw : input_descriptor_nhwc;
+
+ // Parse arguments
+ if(argc < 2)
+ {
+ // Print help
+ std::cout << "Usage: " << argv[0] << " [target] [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
+ std::cout << "No data layout provided: using NCHW\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 2)
+ {
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
+ std::cout << "No data layout provided: using NCHW\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 3)
+ {
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No data layout provided: using NCHW\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 4)
+ {
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No data folder provided: using random values\n\n";
+ }
+ else if(argc == 5)
+ {
+ data_path = argv[4];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [image] [labels] [fast_math_hint]\n\n";
+ std::cout << "No image provided: using random values\n\n";
+ std::cout << "No text file with labels provided: skipping output accessor\n\n";
+ }
+ else if(argc == 6)
+ {
+ data_path = argv[4];
+ image = argv[5];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
+ std::cout << "No text file with labels provided: skipping output accessor\n\n";
+ }
+ else if(argc == 7)
+ {
+ data_path = argv[4];
+ image = argv[5];
+ label = argv[6];
+ std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
+ std::cout << "No fast math info provided: disabling fast math\n\n";
+ }
+ else
+ {
+ data_path = argv[4];
+ image = argv[5];
+ label = argv[6];
+ fast_math_hint = (std::strtol(argv[7], nullptr, 1) == 0) ? FastMathHint::Disabled : FastMathHint::Enabled;
+ }
+
+ // Add model path to data path
+ if(!data_path.empty())
+ {
+ data_path += model_path;
+ }
+
+ graph << target_hint
+ << convolution_hint
+ << depthwise_convolution_hint
+ << fast_math_hint
+ << InputLayer(input_descriptor,
+ get_input_accessor())
+ << ConvolutionLayer(
+ 3U, 3U, 32U * depth_scale,
+ get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))
+ .set_name("Conv2d_0")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"),
+ get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"),
+ get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name("Conv2d_0/BatchNorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6");
+ graph << get_dwsc_node(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << get_dwsc_node(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+ graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("Logits/AvgPool_1a")
+ << ConvolutionLayer(
+ 1U, 1U, 1001U,
+ get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
+ get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
+ PadStrideInfo(1, 1, 0, 0))
+ .set_name("Logits/Conv2d_1c_1x1")
+ << ReshapeLayer(TensorShape(1001U)).set_name("Reshape")
+ << SoftmaxLayer().set_name("Softmax")
+ << OutputLayer(get_output_accessor());
+
+ // Finalize graph
+ GraphConfig config;
+ config.use_tuner = (target == 2);
+ graph.finalize(target_hint, config);
+ }
+ void do_run()
+ {
+ run_benchmark(graph);
+ }
+
+private:
+ Stream graph{ 0, "MobileNetV1" };
+
+ ConcatLayer get_dwsc_node(const std::string &data_path, std::string &&param_path,
+ unsigned int conv_filt,
+ PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info)
+ {
+ std::string total_path = param_path + "_";
+ SubStream sg(graph);
+ sg << DepthwiseConvolutionLayer(
+ 3U, 3U,
+ get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ dwc_pad_stride_info)
+ .set_name(total_path + "depthwise/depthwise")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"),
+ get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"),
+ get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(total_path + "depthwise/BatchNorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6")
+ << ConvolutionLayer(
+ 1U, 1U, conv_filt,
+ get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW),
+ std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+ conv_pad_stride_info)
+ .set_name(total_path + "pointwise/Conv2D")
+ << BatchNormalizationLayer(
+ get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"),
+ get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"),
+ get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"),
+ get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"),
+ 0.001f)
+ .set_name(total_path + "pointwise/BatchNorm")
+ << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6");
+
+ return ConcatLayer(std::move(sg));
+ }
+};
+
+/** Main program for MobileNetV1
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner),
+ * [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160),
+ * [optional] Path to the weights folder,
+ * [optional] image,
+ * [optional] labels,
+ * [optional] data layout,
+ * [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ */
+int main(int argc, char **argv)
+{
+ GraphMobilenetExample example;
+
+ example.do_setup(argc, argv);
+ example.do_run();
+
+ return 0;
+}
diff --git a/runtime/contrib/custom_op/README.md b/runtime/contrib/custom_op/README.md
new file mode 100644
index 000000000..7815ce9d5
--- /dev/null
+++ b/runtime/contrib/custom_op/README.md
@@ -0,0 +1,25 @@
+This document is about custom operators.
+
+# Introduction
+
+# Requirements
+
+- [ ] Support tizen in-house custom op developer
+- [ ] Minimiz exposed headers (structures and functions)
+- [ ] Provide acceptable performance
+
+# Design
+
+## Design
+
+### Workflow
+
+![](customOp-workflow.png)
+
+## Candidate Architecture 1
+
+## Candidate Architecture 2
+
+## Discussion
+
+# Conclusion
diff --git a/runtime/contrib/custom_op/customOp-workflow.png b/runtime/contrib/custom_op/customOp-workflow.png
new file mode 100644
index 000000000..0487f5b63
--- /dev/null
+++ b/runtime/contrib/custom_op/customOp-workflow.png
Binary files differ
diff --git a/runtime/contrib/detection/CMakeLists.txt b/runtime/contrib/detection/CMakeLists.txt
new file mode 100644
index 000000000..37d91b527
--- /dev/null
+++ b/runtime/contrib/detection/CMakeLists.txt
@@ -0,0 +1,11 @@
+if(NOT BUILD_DETECTION_APP)
+ return()
+endif(NOT BUILD_DETECTION_APP)
+
+nnfw_find_package(Tensorflow REQUIRED)
+
+list(APPEND SOURCES detection.cpp)
+
+add_executable(detection ${SOURCES})
+target_link_libraries(detection nnfw_lib_misc)
+target_link_libraries(detection tensorflow-core)
diff --git a/runtime/contrib/detection/detection.cpp b/runtime/contrib/detection/detection.cpp
new file mode 100644
index 000000000..8fe78ca57
--- /dev/null
+++ b/runtime/contrib/detection/detection.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tensorflow/core/public/session.h>
+
+#include <iostream>
+#include <stdexcept>
+
+#include <cassert>
+#include <cstring>
+
+#include "misc/benchmark.h"
+
+#define CHECK_TF(e) \
+ { \
+ if (!(e).ok()) \
+ { \
+ throw std::runtime_error{"'" #e "' FAILED"}; \
+ } \
+ }
+
+int main(int argc, char **argv)
+{
+ if (argc < 2)
+ {
+ std::cerr << "USAGE: " << argv[0] << " [T/F model path] [output 0] [output 1] ..." << std::endl;
+ return 255;
+ }
+
+ std::vector<std::string> output_nodes;
+
+ for (int argn = 2; argn < argc; ++argn)
+ {
+ output_nodes.emplace_back(argv[argn]);
+ }
+
+ tensorflow::Session *sess;
+
+ CHECK_TF(tensorflow::NewSession(tensorflow::SessionOptions(), &sess));
+
+ tensorflow::GraphDef graph_def;
+
+ CHECK_TF(ReadBinaryProto(tensorflow::Env::Default(), argv[1], &graph_def));
+ CHECK_TF(sess->Create(graph_def));
+
+ tensorflow::Tensor input(tensorflow::DT_FLOAT, tensorflow::TensorShape({1, 320, 320, 3}));
+ std::vector<tensorflow::Tensor> outputs;
+
+ for (uint32_t n = 0; n < 5; ++n)
+ {
+ std::chrono::milliseconds elapsed(0);
+
+ nnfw::misc::benchmark::measure(elapsed) << [&](void) {
+ CHECK_TF(sess->Run({{"input_node", input}}, output_nodes, {}, &outputs));
+ };
+
+ std::cout << "Takes " << elapsed.count() << "ms" << std::endl;
+ }
+
+ return 0;
+}
diff --git a/runtime/contrib/heap_trace/CMakeLists.txt b/runtime/contrib/heap_trace/CMakeLists.txt
new file mode 100644
index 000000000..1f18152d8
--- /dev/null
+++ b/runtime/contrib/heap_trace/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(NOT BUILD_HEAP_TRACE)
+ return()
+endif(NOT BUILD_HEAP_TRACE)
+
+add_library(heap_trace SHARED src/cl_create_buffer_stub.cc
+ src/cl_release_mem_object.cc
+ src/free_stub.cc
+ src/malloc_stub.cc
+ src/realloc_stub.cc
+ src/valloc_stub.cc
+ src/symbol_searcher.cc
+ src/trace.cc
+)
+target_link_libraries(heap_trace PRIVATE ${CMAKE_DL_LIBS})
+
+add_subdirectory(tests)
+
+install(TARGETS heap_trace DESTINATION lib)
diff --git a/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
new file mode 100644
index 000000000..d9d2700ee
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/cl_create_buffer_stub.cc
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <CL/cl.h>
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr,
+ cl_int *errcode_ret)
+{
+ static auto isOriginalFunctionCallSuccessful = [](cl_mem result) -> bool { return result; };
+
+ static auto originalFunction =
+ findFunctionByName<cl_mem, cl_context, cl_mem_flags, size_t, void *, cl_int *>(
+ "clCreateBuffer");
+ cl_mem result = originalFunction(context, flags, size, host_ptr, errcode_ret);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(result, size);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/cl_release_mem_object.cc b/runtime/contrib/heap_trace/src/cl_release_mem_object.cc
new file mode 100644
index 000000000..f2f249e0b
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/cl_release_mem_object.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <CL/cl.h>
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+cl_int clReleaseMemObject(cl_mem mem)
+{
+ static auto isOriginalFunctionCallSuccessful = [](cl_int result) -> bool {
+ return result == CL_SUCCESS;
+ };
+
+ auto originalFunction = findFunctionByName<cl_int, cl_mem>("clReleaseMemObject");
+ cl_int result = originalFunction(mem);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logDeallocationEvent(mem);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/free_stub.cc b/runtime/contrib/heap_trace/src/free_stub.cc
new file mode 100644
index 000000000..31af63c8a
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/free_stub.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+void free(void *p) noexcept
+{
+ static auto originalFunction = findFunctionByName<void, void *>("free");
+ originalFunction(p);
+ if (!Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logDeallocationEvent(p);
+ }
+}
+}
diff --git a/runtime/contrib/heap_trace/src/function_resolver.h b/runtime/contrib/heap_trace/src/function_resolver.h
new file mode 100644
index 000000000..9b6879539
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/function_resolver.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FUNCTION_RESOLVER_H
+#define FUNCTION_RESOLVER_H
+
+#include "symbol_searcher.h"
+
+template <typename ReturnType, typename... ArgTypes>
+ReturnType (*findFunctionByName(const char *function_name))(ArgTypes...)
+{
+ auto found_symbol = findSymbol(function_name);
+ return reinterpret_cast<ReturnType (*)(ArgTypes...)>(found_symbol);
+}
+
+#endif // ! FUNCTION_RESOLVER_H
diff --git a/runtime/contrib/heap_trace/src/malloc_stub.cc b/runtime/contrib/heap_trace/src/malloc_stub.cc
new file mode 100644
index 000000000..50124d164
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/malloc_stub.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+void *malloc(size_t sz) noexcept
+{
+ static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+
+ static auto originalFunction = findFunctionByName<void *, size_t>("malloc");
+ void *result = originalFunction(sz);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(result, sz);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/realloc_stub.cc b/runtime/contrib/heap_trace/src/realloc_stub.cc
new file mode 100644
index 000000000..ce4569b0e
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/realloc_stub.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+void *realloc(void *ptr, size_t sz) noexcept
+{
+ static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+
+ static auto originalFunction = findFunctionByName<void *, void *, size_t>("realloc");
+ void *result = originalFunction(ptr, sz);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logDeallocationEvent(ptr);
+ GlobalTrace->logAllocationEvent(result, sz);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/src/symbol_searcher.cc b/runtime/contrib/heap_trace/src/symbol_searcher.cc
new file mode 100644
index 000000000..cf83f2f7b
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/symbol_searcher.cc
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "symbol_searcher.h"
+
+#include <dlfcn.h>
+#include <link.h>
+
+struct SymbolDescription
+{
+ const char *name;
+ void *address = nullptr; // address in memory where this symbol can be found
+
+ SymbolDescription(const char *name) : name(name) {}
+};
+
+using InfoAboutLoadedLib = struct dl_phdr_info *;
+
+static void tryToFindSymbolInLinkedLibraries(SymbolDescription &symbol);
+static void tryToFindSymbolInAllLoadedLibraries(SymbolDescription &symbol);
+static int checkIfLibraryContainsSymbol(InfoAboutLoadedLib library_description, size_t /* size */,
+ void *data);
+static bool isSymbolAddressNotInTheSameTranslationUnit(SymbolDescription *symbol);
+void *findSymbol(const char *name)
+{
+ SymbolDescription symbol(name);
+ tryToFindSymbolInLinkedLibraries(symbol);
+ if (!symbol.address)
+ {
+ tryToFindSymbolInAllLoadedLibraries(symbol);
+ }
+
+ return symbol.address;
+}
+
+static void tryToFindSymbolInLinkedLibraries(SymbolDescription &symbol)
+{
+ symbol.address = dlsym(RTLD_NEXT, symbol.name);
+}
+
+static void tryToFindSymbolInAllLoadedLibraries(SymbolDescription &symbol)
+{
+ dl_iterate_phdr(checkIfLibraryContainsSymbol, &symbol);
+}
+
+static int checkIfLibraryContainsSymbol(InfoAboutLoadedLib library_description, size_t /* size */,
+ void *data)
+{
+ SymbolDescription *symbol = (SymbolDescription *)data;
+
+ void *handle = dlopen(library_description->dlpi_name, RTLD_NOW);
+ symbol->address = dlsym(handle, symbol->name);
+ dlclose(handle);
+ if (symbol->address && isSymbolAddressNotInTheSameTranslationUnit(symbol))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+static bool isSymbolAddressNotInTheSameTranslationUnit(SymbolDescription *symbol)
+{
+ void *handle = dlopen("", RTLD_NOW);
+ void *addressInTheSameTranslationUnit = dlsym(handle, symbol->name);
+ dlclose(handle);
+
+ return addressInTheSameTranslationUnit == nullptr ||
+ addressInTheSameTranslationUnit != symbol->address;
+}
diff --git a/runtime/contrib/heap_trace/src/symbol_searcher.h b/runtime/contrib/heap_trace/src/symbol_searcher.h
new file mode 100644
index 000000000..f70a4e46f
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/symbol_searcher.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SYMBOL_SEARCHER_H
+#define SYMBOL_SEARCHER_H
+
+void *findSymbol(const char *name);
+
+#endif // ! SYMBOL_SEARCHER_H
diff --git a/runtime/contrib/heap_trace/src/trace.cc b/runtime/contrib/heap_trace/src/trace.cc
new file mode 100644
index 000000000..82f2915cb
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/trace.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+
+#include <memory>
+
+std::unique_ptr<Trace> GlobalTrace(new Trace);
+
+bool Trace::Guard::_is_trace_not_available = true;
+thread_local bool Trace::Guard::_is_recursion_detected = false;
+
+Trace::Trace()
+{
+ if (!_out.is_open())
+ {
+ _out.open(getLogFileNameFromEnvVariable("HEAP_TRACE_LOG"));
+ }
+
+ Guard{}.markTraceAsReady();
+}
+
+const char *Trace::getLogFileNameFromEnvVariable(const char *env_variable_name)
+{
+ return getenv(env_variable_name);
+}
+
+void Trace::logAllocationEvent(void *memory_ptr, size_t size_of_allocated_space_in_bytes)
+{
+ Guard{}.signalizeAboutPossibleRecursion();
+ std::lock_guard<std::mutex> guard(_lock);
+ _total_allocated_bytes_on_cpu += size_of_allocated_space_in_bytes;
+ if (_peak_heap_usage_on_cpu < _total_allocated_bytes_on_cpu - _total_deallocated_bytes_on_cpu)
+ {
+ _peak_heap_usage_on_cpu = _total_allocated_bytes_on_cpu - _total_deallocated_bytes_on_cpu;
+ }
+ _memory_in_use_on_cpu[memory_ptr] = size_of_allocated_space_in_bytes;
+ Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+}
+
+void Trace::logDeallocationEvent(void *memory_ptr)
+{
+ Guard{}.signalizeAboutPossibleRecursion();
+ std::lock_guard<std::mutex> guard(_lock);
+ auto found_memory_space_description = _memory_in_use_on_cpu.find(memory_ptr);
+ if (found_memory_space_description != _memory_in_use_on_cpu.end())
+ {
+ _total_deallocated_bytes_on_cpu += found_memory_space_description->second;
+ _memory_in_use_on_cpu.erase(found_memory_space_description);
+ }
+ Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+}
+
+void Trace::logAllocationEvent(cl_mem memory_ptr, size_t size_of_allocated_space_in_bytes)
+{
+ Guard{}.signalizeAboutPossibleRecursion();
+ std::lock_guard<std::mutex> guard(_lock);
+ _total_allocated_bytes_on_gpu += size_of_allocated_space_in_bytes;
+ if (_peak_heap_usage_on_gpu < _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu)
+ {
+ _peak_heap_usage_on_gpu = _total_allocated_bytes_on_gpu - _total_deallocated_bytes_on_gpu;
+ }
+ _memory_in_use_on_gpu[memory_ptr] = size_of_allocated_space_in_bytes;
+ Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+}
+
+void Trace::logDeallocationEvent(cl_mem memory_ptr)
+{
+ Guard{}.signalizeAboutPossibleRecursion();
+ std::lock_guard<std::mutex> guard(_lock);
+ auto found_memory_space_description = _memory_in_use_on_gpu.find(memory_ptr);
+ if (found_memory_space_description != _memory_in_use_on_gpu.end())
+ {
+ _total_deallocated_bytes_on_gpu += found_memory_space_description->second;
+ _memory_in_use_on_gpu.erase(found_memory_space_description);
+ }
+ Guard{}.signalizeThatDangerOfRecursionHAsPassed();
+}
+
+Trace::~Trace()
+{
+ Guard{}.markTraceAsNotReady();
+
+ _out << "On CPU - Peak heap usage: " << _peak_heap_usage_on_cpu
+ << " B, Total allocated: " << _total_allocated_bytes_on_cpu
+ << " B, Total deallocated: " << _total_deallocated_bytes_on_cpu << " B\n";
+ _out << "On GPU - Peak mem usage: " << _peak_heap_usage_on_gpu
+ << " B, Total allocated: " << _total_allocated_bytes_on_gpu
+ << " B, Total deallocated: " << _total_deallocated_bytes_on_gpu << " B\n";
+}
diff --git a/runtime/contrib/heap_trace/src/trace.h b/runtime/contrib/heap_trace/src/trace.h
new file mode 100644
index 000000000..f03a65a58
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/trace.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRACE_H_
+#define TRACE_H_
+
+#include <CL/cl.h>
+
+#include <unordered_map>
+#include <fstream>
+#include <mutex>
+
+class Trace
+{
+public:
+ class Guard
+ {
+ friend class Trace;
+
+ public:
+ bool isActive() { return _is_trace_not_available || _is_recursion_detected; }
+
+ private:
+ void markTraceAsReady() { _is_trace_not_available = false; }
+ void markTraceAsNotReady() { _is_trace_not_available = true; }
+ void signalizeAboutPossibleRecursion() { _is_recursion_detected = true; }
+ void signalizeThatDangerOfRecursionHAsPassed() { _is_recursion_detected = false; }
+
+ private:
+ static bool _is_trace_not_available;
+ static thread_local bool _is_recursion_detected;
+ };
+
+public:
+ Trace();
+ Trace(const Trace &) = delete;
+ const Trace &operator=(const Trace &) = delete;
+
+ void logAllocationEvent(void *memory_ptr, size_t size_of_allocated_space_in_bytes);
+ void logAllocationEvent(cl_mem memory_ptr, size_t size_of_allocated_space_in_bytes);
+ void logDeallocationEvent(void *memory_ptr);
+ void logDeallocationEvent(cl_mem memory_ptr);
+
+ ~Trace();
+
+private:
+ const char *getLogFileNameFromEnvVariable(const char *env_variable_name);
+
+private:
+ std::mutex _lock;
+ std::ofstream _out;
+ size_t _total_allocated_bytes_on_cpu = 0;
+ size_t _total_deallocated_bytes_on_cpu = 0;
+ size_t _peak_heap_usage_on_cpu = 0;
+ size_t _total_allocated_bytes_on_gpu = 0;
+ size_t _total_deallocated_bytes_on_gpu = 0;
+ size_t _peak_heap_usage_on_gpu = 0;
+ std::unordered_map<void *, size_t> _memory_in_use_on_cpu;
+ std::unordered_map<cl_mem, size_t> _memory_in_use_on_gpu;
+};
+
+#endif // !TRACE_H
diff --git a/runtime/contrib/heap_trace/src/valloc_stub.cc b/runtime/contrib/heap_trace/src/valloc_stub.cc
new file mode 100644
index 000000000..24e91bd11
--- /dev/null
+++ b/runtime/contrib/heap_trace/src/valloc_stub.cc
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trace.h"
+#include "function_resolver.h"
+
+#include <memory>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+extern "C" {
+
+void *valloc(size_t sz) noexcept
+{
+ static auto isOriginalFunctionCallSuccessful = [](void *result) -> bool { return result; };
+
+ static auto originalFunction = findFunctionByName<void *, size_t>("valloc");
+ void *result = originalFunction(sz);
+ if (isOriginalFunctionCallSuccessful(result) && !Trace::Guard{}.isActive())
+ {
+ GlobalTrace->logAllocationEvent(result, sz);
+ }
+
+ return result;
+}
+}
diff --git a/runtime/contrib/heap_trace/tests/CMakeLists.txt b/runtime/contrib/heap_trace/tests/CMakeLists.txt
new file mode 100644
index 000000000..8fbe5dec1
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(HEAP_TRACE_TESTS heap_trace_test)
+
+find_package(OpenCL REQUIRED)
+
+add_library(test_sample1 SHARED src/test_sample1/test_sample1.cc)
+
+add_library(test_sample2 SHARED src/test_sample2/test_sample2.cc)
+target_link_libraries(test_sample2 test_sample3)
+
+add_library(test_sample3 SHARED src/test_sample3/test_sample3.cc)
+
+add_library(test_sample4 SHARED src/test_sample4/test_sample4.cc)
+
+add_executable(${HEAP_TRACE_TESTS} src/cl_release_mem_object_interception_test.cc
+ src/symbol_searcher_test.cc
+ src/cl_create_buffer_interception_test.cc
+ src/free_interception_test.cc
+ src/main.cc
+ src/common_test_environment.cc
+ src/trace_test.cc
+ src/malloc_interception_test.cc
+ src/realloc_interception_test.cc
+ src/valloc_interception_test.cc
+ src/file_content_manipulations.cc
+ )
+target_include_directories(${HEAP_TRACE_TESTS} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
+target_link_libraries(${HEAP_TRACE_TESTS} test_sample1)
+target_link_libraries(${HEAP_TRACE_TESTS} heap_trace)
+target_link_libraries(${HEAP_TRACE_TESTS} test_sample4)
+target_link_libraries(${HEAP_TRACE_TESTS} gtest)
+target_link_libraries(${HEAP_TRACE_TESTS} gtest_main)
+target_link_libraries(${HEAP_TRACE_TESTS} ${LIB_PTHREAD})
+target_link_libraries(${HEAP_TRACE_TESTS} ${CMAKE_DL_LIBS})
+target_link_libraries(${HEAP_TRACE_TESTS} ${OpenCL_LIBRARY})
+target_link_libraries(${HEAP_TRACE_TESTS} stdc++fs)
+
+add_test(${HEAP_TRACE_TESTS} ${HEAP_TRACE_TESTS})
+
+install(TARGETS test_sample1 DESTINATION unittest)
+install(TARGETS test_sample2 DESTINATION unittest)
+install(TARGETS test_sample3 DESTINATION unittest)
+install(TARGETS test_sample4 DESTINATION unittest)
+install(TARGETS ${HEAP_TRACE_TESTS} DESTINATION unittest)
diff --git a/runtime/contrib/heap_trace/tests/src/cl_create_buffer_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_create_buffer_interception_test.cc
new file mode 100644
index 000000000..33d01868e
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/cl_create_buffer_interception_test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <CL/cl.h>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct ClCreateBufferStub : public TestEnv
+{
+ cl_context context;
+
+ ClCreateBufferStub() : TestEnv("./cl_create_buffer_interception_test.log") {}
+
+ void SetUp() final
+ {
+ cl_device_id device_id;
+ int err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
+ context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
+
+ TestEnv::SetUp();
+ }
+
+ void TearDown() final
+ {
+ TestEnv::TearDown();
+
+ clReleaseContext(context);
+ }
+};
+
+TEST_F(ClCreateBufferStub, must_allocate_space_as_standard_ocl_function)
+{
+ cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024 * 1024, NULL, NULL);
+
+ ASSERT_TRUE(mem);
+
+ clReleaseMemObject(mem);
+}
+
+TEST_F(ClCreateBufferStub, must_log_allocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ clReleaseMemObject(clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, NULL));
+
+ GlobalTrace.reset(new Trace);
+ clReleaseMemObject(clCreateBuffer(context, CL_MEM_READ_WRITE, 128, NULL, NULL));
+ clReleaseMemObject(clCreateBuffer(context, CL_MEM_READ_WRITE, 64, NULL, NULL));
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./cl_create_buffer_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 128 B, Total allocated: 192 B, Total deallocated: 192 B\n");
+}
+
+TEST_F(ClCreateBufferStub,
+ must_not_do_the_record_about_allocation_event_if_original_function_failed)
+{
+ cl_context badContext = nullptr;
+ cl_mem p = clCreateBuffer(badContext, CL_MEM_READ_WRITE, 1024, nullptr, nullptr);
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(p);
+ ASSERT_STREQ(getContentOfFile("./cl_create_buffer_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
new file mode 100644
index 000000000..ddd1a0d5c
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/cl_release_mem_object_interception_test.cc
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <CL/cl.h>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct ClReleaseMemObjectStub : public TestEnv
+{
+ cl_context context;
+
+ ClReleaseMemObjectStub() : TestEnv("./cl_release_mem_object_interception_test.log") {}
+
+ void SetUp() final
+ {
+ cl_device_id device_id;
+ int err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
+ context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
+
+ TestEnv::SetUp();
+ }
+
+ void TearDown() final
+ {
+ TestEnv::TearDown();
+
+ clReleaseContext(context);
+ }
+};
+
+TEST_F(ClReleaseMemObjectStub, should_work_as_standard_version)
+{
+ cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, NULL);
+ clReleaseMemObject(mem);
+ ASSERT_TRUE(mem);
+}
+
+TEST_F(ClReleaseMemObjectStub, must_log_deallocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ cl_mem mem1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, NULL);
+ ASSERT_TRUE(mem1);
+ clReleaseMemObject(mem1);
+
+ GlobalTrace.reset(new Trace);
+ cl_mem mem2 = clCreateBuffer(context, CL_MEM_READ_WRITE, 128, NULL, NULL);
+ cl_mem mem3 = clCreateBuffer(context, CL_MEM_READ_WRITE, 64, NULL, NULL);
+ ASSERT_TRUE(mem2 && mem3);
+ clReleaseMemObject(mem2);
+ clReleaseMemObject(mem3);
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 192 B, Total allocated: 192 B, Total deallocated: 192 B\n");
+}
+
+TEST_F(ClReleaseMemObjectStub, must_not_log_deallocation_event_if_original_function_failed)
+{
+ cl_mem mem;
+ ASSERT_NE(clReleaseMemObject(mem), CL_SUCCESS);
+
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./cl_release_mem_object_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/common_test_environment.cc b/runtime/contrib/heap_trace/tests/src/common_test_environment.cc
new file mode 100644
index 000000000..e5c4dc121
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/common_test_environment.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+
+#include "trace.h"
+
+#include <experimental/filesystem>
+
+namespace fs = std::experimental::filesystem;
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+void TestEnv::SetUp() { configureTraceToMakeLogInFile(); }
+
+void TestEnv::TearDown() { removeOldTraceLogIfNeed(); }
+
+void TestEnv::configureTraceToMakeLogInFile()
+{
+ removeOldTraceLogIfNeed();
+ setNewNameOfTraceLog();
+}
+
+void TestEnv::removeOldTraceLogIfNeed()
+{
+ GlobalTrace.reset();
+ const char *trace_log_name = getenv("HEAP_TRACE_LOG");
+ if (trace_log_name)
+ {
+ fs::remove(trace_log_name);
+ }
+}
+
+void TestEnv::setNewNameOfTraceLog()
+{
+ setenv("HEAP_TRACE_LOG", test_log_file.c_str(), 1);
+ GlobalTrace.reset(new ::Trace);
+}
diff --git a/runtime/contrib/heap_trace/tests/src/common_test_environment.h b/runtime/contrib/heap_trace/tests/src/common_test_environment.h
new file mode 100644
index 000000000..8c4c9e396
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/common_test_environment.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_TEST_ENVIRONMENT_H
+#define COMMON_TEST_ENVIRONMENT_H
+
+#include "gtest/gtest.h"
+
+struct TestEnv : public ::testing::Test
+{
+ TestEnv(const char *log_file) : test_log_file(log_file) {}
+
+ void SetUp() override;
+ void TearDown() override;
+
+private:
+ void configureTraceToMakeLogInFile();
+ void removeOldTraceLogIfNeed();
+ void setNewNameOfTraceLog();
+
+private:
+ std::string test_log_file;
+};
+
+#endif // ! COMMON_TEST_ENVIRONMENT_H
diff --git a/runtime/contrib/heap_trace/tests/src/file_content_manipulations.cc b/runtime/contrib/heap_trace/tests/src/file_content_manipulations.cc
new file mode 100644
index 000000000..0c968ab4b
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/file_content_manipulations.cc
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_content_manipulations.h"
+
+#include <fstream>
+
+std::string getContentOfFile(const char *file_name)
+{
+ std::ifstream in(file_name);
+ return std::string((std::istreambuf_iterator<char>(in)), std::istreambuf_iterator<char>());
+}
diff --git a/runtime/contrib/heap_trace/tests/src/file_content_manipulations.h b/runtime/contrib/heap_trace/tests/src/file_content_manipulations.h
new file mode 100644
index 000000000..ba938931c
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/file_content_manipulations.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_SUPPORT_H
+#define TEST_SUPPORT_H
+
+#include <string>
+
+std::string getContentOfFile(const char *file_name);
+
+#endif //! TEST_SUPPORT_H
diff --git a/runtime/contrib/heap_trace/tests/src/free_interception_test.cc b/runtime/contrib/heap_trace/tests/src/free_interception_test.cc
new file mode 100644
index 000000000..e690718d0
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/free_interception_test.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct FreeStub : public TestEnv
+{
+ FreeStub() : TestEnv("./free_interception_test.log") {}
+};
+
+TEST_F(FreeStub, should_work_as_standard_version)
+{
+ void *p = malloc(1024);
+ free(p);
+ ASSERT_TRUE(p);
+ // TODO Bad test. Need use death test from Google test framework
+}
+
+TEST_F(FreeStub, must_log_deallocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ void *p1 = malloc(1024);
+ ASSERT_TRUE(p1);
+ free(p1);
+
+ GlobalTrace.reset(new Trace);
+ void *p2 = malloc(128);
+ void *p3 = malloc(64);
+ ASSERT_TRUE(p2 && p3);
+ free(p2);
+ free(p3);
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./free_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 192 B, Total allocated: 192 B, Total deallocated: 192 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/main.cc b/runtime/contrib/heap_trace/tests/src/main.cc
new file mode 100644
index 000000000..b53b2030b
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/main.cc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+int main(int argc, char *argv[])
+{
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
new file mode 100644
index 000000000..7a2f5a915
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/malloc_interception_test.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <experimental/filesystem>
+#include <limits>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct MallocStub : public TestEnv
+{
+ MallocStub() : TestEnv("./malloc_interception_test.log") {}
+};
+
+TEST_F(MallocStub, must_allocate_space_as_standard_malloc)
+{
+ void *p = malloc(128);
+
+ ASSERT_TRUE(p);
+ free(p);
+}
+
+TEST_F(MallocStub, must_log_allocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ void *p1 = malloc(1024);
+
+ GlobalTrace.reset(new Trace);
+ void *p2 = malloc(128);
+ void *p3 = malloc(64);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p1 && p2 && p3);
+ ASSERT_STREQ(getContentOfFile("./malloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 192 B, Total allocated: 192 B, Total deallocated: 0 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ free(p1);
+ free(p2);
+ free(p3);
+}
+
+TEST_F(MallocStub, must_not_do_the_record_about_allocation_event_if_original_function_failed)
+{
+ void *p = malloc(std::numeric_limits<size_t>::max());
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(p);
+ ASSERT_STREQ(getContentOfFile("./malloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+TEST_F(
+ MallocStub,
+ should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero)
+{
+ void *p = malloc(0);
+ free(p);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p);
+ ASSERT_STREQ(getContentOfFile("./malloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
new file mode 100644
index 000000000..160a19beb
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/realloc_interception_test.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <experimental/filesystem>
+#include <limits>
+#include <cstring>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct ReallocStub : public TestEnv
+{
+ ReallocStub() : TestEnv("./realloc_interception_test.log") {}
+};
+
+TEST_F(ReallocStub, must_allocate_space_as_standard_realloc)
+{
+ void *p = malloc(128);
+ p = realloc(p, 1024);
+
+ ASSERT_TRUE(p);
+ free(p);
+}
+
+TEST_F(ReallocStub, must_log_allocation_deallocation_events_if_trace_is_ready_for_it)
+{
+ std::array<char, 1024> reference_data;
+ reference_data.fill('a');
+ void *p1 = malloc(1024);
+ memcpy(p1, reference_data.data(), reference_data.size());
+ void *p2 = realloc(p1, 64);
+ void *p3 = realloc(p2, 128);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p3);
+ ASSERT_TRUE(memcmp(p3, reference_data.data(), 64) == 0);
+ ASSERT_STREQ(getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 1024 B, Total allocated: 1216 B, Total deallocated: 1088 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ free(p3);
+}
+
+TEST_F(ReallocStub,
+ must_not_do_the_record_about_allocation_deallocation_events_if_original_function_failed)
+{
+ GlobalTrace.reset();
+ void *p = malloc(128);
+ GlobalTrace.reset(new Trace);
+
+ void *ptr_after_realloc = realloc(p, std::numeric_limits<size_t>::max());
+ ptr_after_realloc = realloc(p, 0);
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(ptr_after_realloc);
+ ASSERT_STREQ(getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+
+ free(p);
+}
+
+TEST_F(ReallocStub, should_work_as_malloc_when_incoming_ptr_is_equal_to_nullptr)
+{
+ void *p = realloc(nullptr, 1024);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p);
+ ASSERT_STREQ(
+ getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 1024 B, Total allocated: 1024 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+
+ free(p);
+}
+
+TEST_F(
+ ReallocStub,
+ should_not_influence_on_trace_results_even_if_orignal_function_return_any_not_null_ptr_when_incoming_size_is_zero_and_ptr_is_null)
+{
+ void *p = realloc(nullptr, 0);
+ free(p);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p);
+ ASSERT_STREQ(getContentOfFile("./realloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
new file mode 100644
index 000000000..d615cc928
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/symbol_searcher_test.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "test_sample1.h"
+#include "test_sample2.h"
+#include "test_sample4.h"
+
+#include "symbol_searcher.h"
+
+#include <dlfcn.h>
+#include <linux/limits.h>
+#include <unistd.h>
+
+#include <cstdlib>
+
+#include <experimental/filesystem>
+
+namespace fs = std::experimental::filesystem;
+
+fs::path exePath()
+{
+ char result[PATH_MAX] = {0};
+ ssize_t count = readlink("/proc/self/exe", result, PATH_MAX);
+ return fs::path(result).parent_path();
+}
+
+namespace backstage
+{
+
+struct SymbolSearcher : public ::testing::Test
+{
+};
+
+TEST_F(SymbolSearcher, should_find_symbol_in_linked_library)
+{
+ ASSERT_TRUE((void *)funcDefinedOnlyInTestSample4 == findSymbol("funcDefinedOnlyInTestSample4"));
+}
+
+TEST_F(SymbolSearcher, should_find_symbol_in_library_which_have_been_loaded_in_runtime)
+{
+ fs::path pathToTestLib = exePath() / "libtest_sample2.so";
+ void *handle = dlopen(pathToTestLib.c_str(), RTLD_NOW);
+
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(dlsym(handle, "funcDefinedOnlyInTestSample2") ==
+ findSymbol("funcDefinedOnlyInTestSample2"));
+ dlclose(handle);
+}
+
+TEST_F(SymbolSearcher,
+ should_ignore_symbols_found_in_current_translation_unit_if_there_is_another_alternative)
+{
+ fs::path pathToTestSample2 = exePath() / "libtest_sample2.so";
+ void *test_sample2_handle = dlopen(pathToTestSample2.c_str(), RTLD_NOW);
+ void *func_addr_in_test_sample2 =
+ dlsym(test_sample2_handle, "funcWhichCallFuncDefinedInTestSample3");
+
+ ASSERT_TRUE(test_sample2_handle);
+ ASSERT_TRUE((void *)funcDefinedInTestSample3_ButWrappedInTestSample1 !=
+ reinterpret_cast<void *(*)()>(func_addr_in_test_sample2)());
+
+ dlclose(test_sample2_handle);
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample1.h b/runtime/contrib/heap_trace/tests/src/test_sample1.h
new file mode 100644
index 000000000..3abea56a3
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample1.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_SAMPLE1_H
+#define TEST_SAMPLE1_H
+
+extern "C" {
+
+void *funcDefinedInTestSample3_ButWrappedInTestSample1();
+}
+
+#endif // ! TEST_SAMPLE1_H
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample1/test_sample1.cc b/runtime/contrib/heap_trace/tests/src/test_sample1/test_sample1.cc
new file mode 100644
index 000000000..1d2843707
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample1/test_sample1.cc
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_sample1.h"
+
+#include "../../../src/symbol_searcher.h"
+
+extern "C" {
+
+void *funcDefinedInTestSample3_ButWrappedInTestSample1()
+{
+ return findSymbol("funcDefinedInTestSample3_ButWrappedInTestSample1");
+}
+}
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample2.h b/runtime/contrib/heap_trace/tests/src/test_sample2.h
new file mode 100644
index 000000000..785fc252d
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample2.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_SAMPLE2_H
+#define TEST_SAMPLE2_H
+
+extern "C" {
+
+void funcDefinedOnlyInTestSample2();
+void funcWhichCallFuncDefinedInTestSample3();
+}
+
+#endif // ! TEST_SAMPLE2_H
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample2/test_sample2.cc b/runtime/contrib/heap_trace/tests/src/test_sample2/test_sample2.cc
new file mode 100644
index 000000000..792dba665
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample2/test_sample2.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_sample2.h"
+#include "../test_sample3.h"
+
+extern "C" {
+
+void funcDefinedOnlyInTestSample2() {}
+void funcWhichCallFuncDefinedInTestSample3() { funcDefinedInTestSample3_ButWrappedInTestSample1(); }
+}
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample3.h b/runtime/contrib/heap_trace/tests/src/test_sample3.h
new file mode 100644
index 000000000..8f85b8bd6
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample3.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_SAMPLE3_H
+#define TEST_SAMPLE3_H
+
+extern "C" {
+
+void funcDefinedInTestSample3_ButWrappedInTestSample1();
+}
+
+#endif // ! TEST_SAMPLE3_H
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample3/test_sample3.cc b/runtime/contrib/heap_trace/tests/src/test_sample3/test_sample3.cc
new file mode 100644
index 000000000..ded78db85
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample3/test_sample3.cc
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+extern "C" {
+
+void funcDefinedInTestSample3_ButWrappedInTestSample1() {}
+}
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample4.h b/runtime/contrib/heap_trace/tests/src/test_sample4.h
new file mode 100644
index 000000000..47c9e8c92
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample4.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_SAMPLE4_H
+#define TEST_SAMPLE4_H
+
+extern "C" {
+
+void funcDefinedOnlyInTestSample4();
+}
+
+#endif // ! TEST_SAMPLE4_H
diff --git a/runtime/contrib/heap_trace/tests/src/test_sample4/test_sample4.cc b/runtime/contrib/heap_trace/tests/src/test_sample4/test_sample4.cc
new file mode 100644
index 000000000..55c96cb0e
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/test_sample4/test_sample4.cc
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_sample4.h"
+
+extern "C" {
+void funcDefinedOnlyInTestSample4() {}
+}
diff --git a/runtime/contrib/heap_trace/tests/src/trace_test.cc b/runtime/contrib/heap_trace/tests/src/trace_test.cc
new file mode 100644
index 000000000..1cf4c530b
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/trace_test.cc
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <CL/cl.h>
+
+#include <experimental/filesystem>
+#include <thread>
+#include <atomic>
+
+using namespace std;
+namespace fs = experimental::filesystem;
+
+extern unique_ptr<::Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct Trace : TestEnv
+{
+ Trace() : TestEnv("./trace_test.log") {}
+
+ void generateGarbageInTestLogFile();
+ template <typename MemType>
+ static void emulateAllocationEvent(size_t eventsPoolId, size_t numberOfEmulation,
+ size_t numberOfBytesPerOneEmulation, atomic_bool &isPauseNeed);
+};
+
+TEST_F(Trace,
+ must_create_log_file_with_name_defined_in_env_var_HEAP_TRACE_LOG_during_initialization)
+{
+ ASSERT_TRUE(fs::exists("./trace_test.log"));
+}
+
+TEST_F(Trace, must_truncate_log_file_if_it_exists_during_initialization)
+{
+ GlobalTrace.reset();
+ generateGarbageInTestLogFile();
+ GlobalTrace.reset(new ::Trace);
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./trace_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+void Trace::generateGarbageInTestLogFile()
+{
+ ofstream log("./trace_test.log");
+ log << string(256, 'a');
+}
+
+TEST_F(Trace, should_not_generate_any_records_in_log_during_creation)
+{
+ GlobalTrace.reset();
+
+ ASSERT_STREQ(getContentOfFile("./trace_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+TEST_F(Trace, can_signalize_to_users_if_it_is_ready_for_using)
+{
+ ASSERT_FALSE(::Trace::Guard().isActive());
+}
+
+TEST_F(Trace, must_signalize_that_it_is_not_ready_for_using_until_it_is_not_created)
+{
+ GlobalTrace.reset();
+ ASSERT_TRUE(::Trace::Guard().isActive());
+}
+
+TEST_F(Trace, should_work_correctly_in_multithreaded_environment)
+{
+ constexpr size_t numberOfThreads = 10, numberOfEmulations = 100,
+ numberOfBytesPerOneEmulation = 1024;
+ atomic_bool isPauseNeed{true};
+ array<thread, numberOfThreads> threads;
+ for (size_t i = 0; i < numberOfThreads / 2; ++i)
+ {
+ threads[i] = thread(emulateAllocationEvent<void *>, i, numberOfEmulations,
+ numberOfBytesPerOneEmulation, ref(isPauseNeed));
+ }
+ for (size_t i = numberOfThreads / 2; i < numberOfThreads; ++i)
+ {
+ threads[i] = thread(emulateAllocationEvent<cl_mem>, i, numberOfEmulations,
+ numberOfBytesPerOneEmulation, ref(isPauseNeed));
+ }
+
+ GlobalTrace.reset(new ::Trace);
+ isPauseNeed = false;
+
+ for (size_t i = 0; i < numberOfThreads; ++i)
+ {
+ threads[i].join();
+ }
+ GlobalTrace.reset();
+
+ string thisShouldBeInLogFile =
+ "Total allocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+ " B, Total deallocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+ string andThisToo =
+ "Total allocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) +
+ " B, Total deallocated: " +
+ to_string(numberOfThreads / 2 * numberOfEmulations * numberOfBytesPerOneEmulation) + " B\n";
+ ASSERT_TRUE(getContentOfFile("./trace_test.log").find(thisShouldBeInLogFile) != string::npos);
+ ASSERT_TRUE(getContentOfFile("./trace_test.log").find(andThisToo) != string::npos);
+}
+
+template <typename MemType>
+void Trace::emulateAllocationEvent(size_t eventsPoolId, size_t numberOfEmulation,
+ size_t numberOfBytesPerOneEmulation, atomic_bool &isPauseNeed)
+{
+ while (isPauseNeed)
+ {
+ continue;
+ }
+
+ for (size_t i = 1; i <= numberOfEmulation; ++i)
+ {
+ GlobalTrace->logAllocationEvent((MemType)(i + numberOfEmulation * eventsPoolId),
+ numberOfBytesPerOneEmulation);
+ }
+
+ for (size_t i = 1; i <= numberOfEmulation; ++i)
+ {
+ GlobalTrace->logDeallocationEvent((MemType)(i + numberOfEmulation * eventsPoolId));
+ }
+}
+
+TEST_F(Trace, must_log_allocation_and_deallocation_events)
+{
+ void *memOnCPU1 = (void *)1, *memOnCPU2 = (void *)3;
+ cl_mem memOnGPU1 = (cl_mem)2, memOnGPU2 = (cl_mem)4;
+ GlobalTrace->logAllocationEvent(memOnCPU1, 347);
+ GlobalTrace->logDeallocationEvent(memOnCPU1);
+ GlobalTrace->logAllocationEvent(memOnGPU2, 592);
+ GlobalTrace->logDeallocationEvent(memOnGPU2);
+ GlobalTrace->logAllocationEvent(memOnGPU1, 349);
+ GlobalTrace->logDeallocationEvent(memOnGPU1);
+ GlobalTrace->logAllocationEvent(memOnCPU2, 568);
+ GlobalTrace->logDeallocationEvent(memOnCPU2);
+ GlobalTrace.reset();
+
+ string shouldBeInLogFile = "On CPU - Peak heap usage: " + to_string(568) +
+ " B, Total allocated: " + to_string(347 + 568) +
+ " B, Total deallocated: " + to_string(347 + 568) +
+ " B\n"
+ "On GPU - Peak mem usage: " +
+ to_string(592) + " B, Total allocated: " + to_string(592 + 349) +
+ " B, Total deallocated: " + to_string(592 + 349) + " B\n";
+ ASSERT_STREQ(getContentOfFile("./trace_test.log").c_str(), shouldBeInLogFile.c_str());
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc b/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc
new file mode 100644
index 000000000..6b3d6df82
--- /dev/null
+++ b/runtime/contrib/heap_trace/tests/src/valloc_interception_test.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_test_environment.h"
+#include "file_content_manipulations.h"
+
+#include "trace.h"
+
+#include <experimental/filesystem>
+#include <limits>
+
+extern std::unique_ptr<Trace> GlobalTrace;
+
+namespace backstage
+{
+
+struct VallocStub : public TestEnv
+{
+ VallocStub() : TestEnv("./valloc_interception_test.log") {}
+};
+
+TEST_F(VallocStub, must_allocate_space_as_standard_valloc)
+{
+ void *p = valloc(128);
+
+ ASSERT_TRUE(p);
+ free(p);
+}
+
+TEST_F(VallocStub, must_log_allocation_events_if_trace_is_ready_for_it)
+{
+ GlobalTrace.reset();
+ void *p1 = valloc(1024);
+
+ GlobalTrace.reset(new Trace);
+ void *p2 = valloc(128);
+ void *p3 = valloc(64);
+ GlobalTrace.reset();
+
+ ASSERT_TRUE(p1 && p2 && p3);
+ ASSERT_STREQ(getContentOfFile("./valloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 192 B, Total allocated: 192 B, Total deallocated: 0 "
+ "B\nOn GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+ free(p1);
+ free(p2);
+ free(p3);
+}
+
+TEST_F(VallocStub, must_not_do_the_record_about_allocation_event_if_original_function_failed)
+{
+ void *p = valloc(std::numeric_limits<size_t>::max());
+ GlobalTrace.reset();
+
+ ASSERT_FALSE(p);
+ ASSERT_STREQ(getContentOfFile("./valloc_interception_test.log").c_str(),
+ "On CPU - Peak heap usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\nOn "
+ "GPU - Peak mem usage: 0 B, Total allocated: 0 B, Total deallocated: 0 B\n");
+}
+
+} // namespace backstage
diff --git a/runtime/contrib/labs/CMakeLists.txt b/runtime/contrib/labs/CMakeLists.txt
new file mode 100644
index 000000000..57e28c11a
--- /dev/null
+++ b/runtime/contrib/labs/CMakeLists.txt
@@ -0,0 +1,5 @@
+if(NOT BUILD_LABS)
+ return()
+endif(NOT BUILD_LABS)
+
+add_subdirectories()
diff --git a/runtime/contrib/labs/jniacl/CMakeLists.txt b/runtime/contrib/labs/jniacl/CMakeLists.txt
new file mode 100644
index 000000000..6d05122a0
--- /dev/null
+++ b/runtime/contrib/labs/jniacl/CMakeLists.txt
@@ -0,0 +1,18 @@
+#
+# Simple Android JNI execution test of ACL
+#
+
+if(NOT ANDROID)
+ return()
+endif(NOT ANDROID)
+
+nnas_find_package(ARMCompute REQUIRED)
+
+link_directories(${CMAKE_INSTALL_PREFIX}/lib)
+
+set(JNIACL_SRCS src/jniacl_main.cc
+ src/io_accessor.cc)
+
+add_library(jniacl_jni SHARED ${JNIACL_SRCS})
+target_include_directories(jniacl_jni PUBLIC ${TFLITE_JNI_INCLUDES} src)
+target_link_libraries(jniacl_jni arm_compute_graph log)
diff --git a/runtime/contrib/labs/jniacl/src/io_accessor.cc b/runtime/contrib/labs/jniacl/src/io_accessor.cc
new file mode 100644
index 000000000..076c93f3d
--- /dev/null
+++ b/runtime/contrib/labs/jniacl/src/io_accessor.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "io_accessor.h"
+#include <ostream>
+#include <android/log.h>
+
+bool InputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ // Subtract the mean value from each channel
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = _test_input;
+ _test_input += _inc ? 1.0 : 0.0;
+
+ __android_log_print(ANDROID_LOG_DEBUG, "LOG_TAG", "Input %d, %d = %lf\r\n", id.y(), id.x(),
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)));
+ });
+ return true;
+}
+
+bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ // Subtract the mean value from each channel
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
+ __android_log_print(ANDROID_LOG_DEBUG, "Output", "Input %d, %d = %lf\r\n", id.y(), id.x(),
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)));
+ });
+ return false; // end the network
+}
+
+bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ // Subtract the mean value from each channel
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = _test_weight;
+ _test_weight += _inc ? 1.0 : 0.0;
+ });
+ return true;
+}
+
+bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ // Subtract the mean value from each channel
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates &id) {
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) = 0.0;
+ });
+ return true;
+}
diff --git a/runtime/contrib/labs/jniacl/src/io_accessor.h b/runtime/contrib/labs/jniacl/src/io_accessor.h
new file mode 100644
index 000000000..bc4376644
--- /dev/null
+++ b/runtime/contrib/labs/jniacl/src/io_accessor.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IO_ACCESSOR_H__
+#define __IO_ACCESSOR_H__
+
+#include <arm_compute/graph/ITensorAccessor.h>
+
+class InputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ InputAccessor(bool inc) : _inc(inc) { _test_input = 1.0; }
+ InputAccessor(InputAccessor &&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor &tensor) override;
+
+private:
+ bool _inc;
+ float _test_input;
+};
+
+class OutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ OutputAccessor() = default;
+ OutputAccessor(OutputAccessor &&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor &tensor) override;
+};
+
+class WeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ WeightAccessor(bool inc) : _inc(inc) { _test_weight = 1.0; }
+ WeightAccessor(WeightAccessor &&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor &tensor) override;
+
+private:
+ bool _inc;
+ float _test_weight;
+};
+
+class BiasAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ BiasAccessor() = default;
+ BiasAccessor(BiasAccessor &&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor &tensor) override;
+};
+
+#endif // __IO_ACCESSOR_H__
diff --git a/runtime/contrib/labs/jniacl/src/jniacl_main.cc b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
new file mode 100644
index 000000000..4e5f10d1f
--- /dev/null
+++ b/runtime/contrib/labs/jniacl/src/jniacl_main.cc
@@ -0,0 +1,37 @@
+#include <jni.h>
+#include <string>
+
+#include <arm_compute/graph/Graph.h>
+#include <arm_compute/graph/Nodes.h>
+
+#include "io_accessor.h"
+
+extern "C" JNIEXPORT jstring JNICALL
+Java_com_samsung_testaclexec_ActivityMain_RunACLJNI(JNIEnv *env, jobject)
+{
+ using arm_compute::DataType;
+ using arm_compute::graph::Tensor;
+ using arm_compute::graph::TargetHint;
+ using arm_compute::graph::Graph;
+ using arm_compute::TensorInfo;
+ using arm_compute::TensorShape;
+
+ arm_compute::graph::Graph graph;
+ TargetHint target_hint = TargetHint::OPENCL;
+ bool autoinc = true;
+
+ graph << target_hint << Tensor(TensorInfo(TensorShape(3U, 3U, 1U, 1U), 1, DataType::F32),
+ std::unique_ptr<InputAccessor>(new InputAccessor(autoinc)))
+ << arm_compute::graph::ConvolutionLayer(
+ 3U, 3U, 1U, std::unique_ptr<WeightAccessor>(new WeightAccessor(autoinc)),
+ std::unique_ptr<BiasAccessor>(new BiasAccessor()),
+ arm_compute::PadStrideInfo(1, 1, 0, 0))
+ << Tensor(std::unique_ptr<OutputAccessor>(new OutputAccessor()));
+ ;
+
+ graph.run();
+
+ std::string hello = "SoftMax Run OK";
+
+ return env->NewStringUTF(hello.c_str());
+}
diff --git a/runtime/contrib/labs/opencl_test/CMakeLists.txt b/runtime/contrib/labs/opencl_test/CMakeLists.txt
new file mode 100644
index 000000000..40cb30bb9
--- /dev/null
+++ b/runtime/contrib/labs/opencl_test/CMakeLists.txt
@@ -0,0 +1,11 @@
+if(NOT ${TARGET_ARCH_BASE} STREQUAL "arm")
+ return()
+endif(NOT ${TARGET_ARCH_BASE} STREQUAL "arm")
+
+list(APPEND OPENCL_INFO_SOURCE "src/opencl_test.cc")
+
+nnas_find_package(ARMCompute REQUIRED)
+
+add_executable(opencl_test ${OPENCL_INFO_SOURCE})
+target_link_libraries(opencl_test arm_compute)
+target_link_libraries(opencl_test arm_compute_ex)
diff --git a/runtime/contrib/labs/opencl_test/README.md b/runtime/contrib/labs/opencl_test/README.md
new file mode 100644
index 000000000..950528f81
--- /dev/null
+++ b/runtime/contrib/labs/opencl_test/README.md
@@ -0,0 +1,8 @@
+This directory contains experients of OpenCL code.
+
+How to run:
+```
+LD_LIBRARY_PATH=Product/out/lib Product/obj/contrib/opencl_test/opencl_test [option]
+```
+ - `[option]`
+ - `-g`: prints devices inside GPU and check if they use same memory address
diff --git a/runtime/contrib/labs/opencl_test/src/opencl_test.cc b/runtime/contrib/labs/opencl_test/src/opencl_test.cc
new file mode 100644
index 000000000..1faa91478
--- /dev/null
+++ b/runtime/contrib/labs/opencl_test/src/opencl_test.cc
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*******************************************************************************
+ * Copyright (c) 2008-2015 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <iostream>
+#include <vector>
+
+void printDeviceInfo(int n, cl::Device &device, cl::Device &default_device)
+{
+ bool is_default = (device() == default_device());
+ std::cout << "\t\t\t#" << n << " Device: (id: " << device() << ") "
+ << (is_default ? " -> default" : "") << "\n";
+
+ const auto name = device.getInfo<CL_DEVICE_NAME>();
+ std::cout << "\t\t\t\tName: " << name << "\n";
+
+ const auto compute_unit = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
+ std::cout << "\t\t\t\tMax Compute Unit: " << compute_unit << "\n";
+
+ const auto max_work_item_size = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
+ std::cout << "\t\t\t\tMax Work Item Size: [";
+ for (auto size : max_work_item_size)
+ std::cout << size << ",";
+ std::cout << "]\n";
+
+ const auto max_work_group_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
+ std::cout << "\t\t\t\tMax Work Grpup Size: " << max_work_group_size << "\n";
+
+ const auto max_clock_frequency = device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>();
+ std::cout << "\t\t\t\tMax Clock Frequency: " << max_clock_frequency << "\n";
+
+ std::cout << "\n";
+}
+
+class OpenCLGpu
+{
+public:
+ cl::Platform platform_;
+ cl::Context context_;
+ cl::vector<cl::Device> devices_;
+ std::vector<cl::CommandQueue *> q_;
+ cl::Program program_;
+
+ OpenCLGpu()
+ {
+ cl_int cl_error;
+
+ platform_ = cl::Platform::getDefault();
+
+ try
+ {
+ cl_context_properties properties[3] = {CL_CONTEXT_PLATFORM,
+ (cl_context_properties)platform_(), 0};
+
+ context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error);
+ }
+ catch (cl::Error &err) // thrown when there is no Context for this platform
+ {
+ std::cout << "\t\t No Context Found\n";
+ return;
+ }
+
+ devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
+
+ for (int dev_id = 0; dev_id < devices_.size(); dev_id++)
+ {
+ cl::CommandQueue *que = new cl::CommandQueue(context_, devices_[dev_id]);
+ q_.emplace_back(que);
+ }
+ }
+
+ ~OpenCLGpu()
+ {
+ for (auto each_q : q_)
+ delete each_q;
+ }
+
+ void buildProgram(std::string &kernel_source_code)
+ {
+ std::vector<std::string> programStrings{kernel_source_code};
+
+ program_ = cl::Program(context_, programStrings);
+
+ try
+ {
+ program_.build("-cl-std=CL1.2");
+ }
+ catch (cl::Error &err)
+ {
+ cl_int buildErr = CL_SUCCESS;
+ auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr);
+ for (auto &pair : buildInfo)
+ {
+ std::cerr << pair.second << std::endl << std::endl;
+ }
+ }
+ }
+};
+
+void checkContextMem()
+{
+ cl_int cl_error;
+
+ // get context, devices
+ //
+ std::cout << "\nChecking if devices in GPU shares the same memory address:\n\n";
+
+ OpenCLGpu gpu;
+
+ std::cout << "\nDevices in GPU:\n\n";
+
+ auto &devices = gpu.devices_;
+ auto default_device = cl::Device::getDefault();
+
+ int d = 0;
+ for (auto device : devices)
+ printDeviceInfo(++d, device, default_device);
+
+ if (d < 2)
+ {
+ std::cout << "\t\t This options works when there are n (>= 2) devices.\n";
+ return;
+ }
+
+ // allocate and map memory
+
+ typedef cl_int T;
+ const int items_per_device = 128;
+ const int length = items_per_device * devices.size();
+
+ std::vector<T> input(length);
+ std::vector<T> output(length, 0);
+
+ for (int i = 0; i < length; i++)
+ input[i] = i;
+
+ cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T),
+ input.data(), &cl_error);
+ cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T),
+ output.data(), &cl_error);
+
+ // compile test cl code
+
+ std::string kernel_source{"typedef int T; \n"
+ "kernel void memory_test( \n"
+ " const int dev_id, \n"
+ " global T* input, \n"
+ " global T* output, \n"
+ " const int start_idx, \n"
+ " const int count) \n"
+ "{ \n"
+ " int input_idx = get_global_id(0); \n"
+ " if(input_idx < count) \n"
+ " { \n"
+ " int output_idx = start_idx + input_idx; \n"
+ " output[output_idx] = input[input_idx] + dev_id; \n"
+ " } \n"
+ "} \n"};
+
+ gpu.buildProgram(kernel_source);
+
+ try
+ {
+ auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int>(
+ gpu.program_, "memory_test"); // name should be same as cl function name
+
+ // create a queue per device and queue a kernel job
+
+ for (int dev_id = 0; dev_id < devices.size(); dev_id++)
+ {
+ kernel_functor(cl::EnqueueArgs(*(gpu.q_[dev_id]), cl::NDRange(items_per_device)),
+ (cl_int)dev_id, // dev id
+ input_buf, output_buf,
+ (cl_int)(items_per_device * dev_id), // start index
+ (cl_int)(items_per_device), // count
+ cl_error);
+ }
+
+ // sync
+
+ for (d = 0; d < devices.size(); d++)
+ (gpu.q_[d])->finish();
+
+ // check if memory state changed by all devices
+
+ cl::copy(*(gpu.q_[0]), output_buf, begin(output), end(output));
+
+ bool use_same_memory = true;
+
+ for (int dev_id = 0; dev_id < devices.size(); dev_id++)
+ {
+ for (int i = 0; i < items_per_device; ++i)
+ {
+ int output_idx = items_per_device * dev_id + i;
+ if (output[output_idx] != input[i] + dev_id)
+ {
+ std::cout << "Output[" << output_idx << "] : "
+ << "expected = " << input[i] + dev_id << "; actual = " << output[output_idx]
+ << "\n";
+ use_same_memory = false;
+ break;
+ }
+ }
+ }
+
+ if (use_same_memory)
+ std::cout << "\n=> Mapped memory addresses used by devices in GPU are same.\n\n";
+ else
+ std::cout << "\n=> Mapped memory addresses used by devices in GPU are different.\n\n";
+ }
+ catch (cl::Error &err)
+ {
+ std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl;
+ }
+}
+
+void printHelp()
+{
+ std::cout << "opencl information: \n\n";
+ std::cout << "\t -h : help\n";
+ std::cout
+ << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n";
+ std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n";
+}
+
+#include <mutex>
+#include <chrono>
+#include <thread>
+#include <condition_variable>
+
+#define MAX_DEVICE_NUM 8 // just for testing
+
+int kernel_idx[MAX_DEVICE_NUM];
+unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed.
+unsigned char
+ kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed
+int device_num;
+std::mutex kernel_complete_handler_mutex;
+
+std::condition_variable wakeup_main;
+std::mutex wakeup_main_mutex;
+
+void notifyKernelFinished(cl_event ev, cl_int ev_info, void *device_idx)
+{
+ std::cout << "callback from device[" << *((int *)device_idx) << "] : ==> completed.\n";
+
+ std::unique_lock<std::mutex> lock(kernel_complete_handler_mutex);
+
+ kernel_completed |= 0x01 << *((int *)device_idx);
+ if (kernel_completed == kernel_completed_flag)
+ wakeup_main.notify_one();
+}
+
+void testSync()
+{
+ OpenCLGpu gpu;
+
+ cl_int cl_error;
+ typedef cl_int T;
+ const int items_per_device = 1024 * 768;
+ const int length = items_per_device * gpu.devices_.size();
+
+ std::vector<T> output(length, 0);
+
+ cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length * sizeof(T),
+ output.data(), &cl_error);
+
+ std::string kernel_source{"kernel void test(global float* output, const int count) \n"
+ "{ \n"
+ " int idx = get_global_id(0); \n"
+ " if(idx < count) \n"
+ " { \n"
+ " float x = hypot(idx/1.111, idx*1.111); \n"
+ " for (int y = 0; y < 200; y++) \n"
+ " x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5); \n"
+ " output[idx] = x; \n"
+ " } \n"
+ "} \n"};
+
+ gpu.buildProgram(kernel_source);
+
+ try
+ {
+ auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int>(
+ gpu.program_, "test"); // name should be same as cl function name
+
+ // variable init
+ cl::Event ev[MAX_DEVICE_NUM];
+
+ device_num = gpu.devices_.size();
+
+ kernel_completed = 0;
+ kernel_completed_flag = 0;
+ for (int i = 0; i < device_num; i++)
+ {
+ kernel_idx[i] = i;
+ kernel_completed_flag |= 0x01 << i;
+ }
+
+ // create a queue per device and queue a kernel job
+ // queueing with callback function
+ for (int dev_id = 0; dev_id < gpu.devices_.size(); dev_id++)
+ {
+ ev[dev_id] = kernel_functor(cl::EnqueueArgs(*(gpu.q_[dev_id]), cl::NDRange(items_per_device)),
+ output_buf,
+ (cl_int)(items_per_device), // count
+ cl_error);
+ ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void *)(kernel_idx + dev_id));
+
+ // how to check kernel execution status
+ //
+ // auto status = ev[dev_id].getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>();
+ // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status ==
+ // CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown")
+ // << std::endl;
+ // std::cout << "Event status code = " << status << std::endl;
+ }
+
+ // long wait until kernels are over
+ {
+ std::unique_lock<std::mutex> lk(wakeup_main_mutex);
+ wakeup_main.wait(lk, [] { return (kernel_completed == kernel_completed_flag); });
+
+ std::cout << "all devices were completed.\n";
+ }
+ }
+ catch (cl::Error &err)
+ {
+ std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl;
+ }
+}
+
+int main(const int argc, char **argv)
+{
+ if (argc < 2)
+ printHelp();
+ else
+ {
+ std::string option = argv[1];
+
+ if (option == "-h") // help
+ printHelp();
+ else if (option == "-g") // check if devices in GPU uses same memory address
+ checkContextMem();
+ else if (option == "-s") // check synchronization between devices in GPU
+ testSync();
+ }
+ return 0;
+}
diff --git a/runtime/contrib/labs/tflite_examples/CMakeLists.txt b/runtime/contrib/labs/tflite_examples/CMakeLists.txt
new file mode 100644
index 000000000..463bc5531
--- /dev/null
+++ b/runtime/contrib/labs/tflite_examples/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_executable(tflite_conv_example "src/conv.cpp")
+target_link_libraries(tflite_conv_example tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
diff --git a/runtime/contrib/labs/tflite_examples/src/conv.cpp b/runtime/contrib/labs/tflite_examples/src/conv.cpp
new file mode 100644
index 000000000..3117c316c
--- /dev/null
+++ b/runtime/contrib/labs/tflite_examples/src/conv.cpp
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/builtin_op_data.h"
+
+#include <iostream>
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+namespace vector
+{
+
+template <typename T> struct View
+{
+ virtual ~View() = default;
+
+ virtual int32_t size(void) const = 0;
+ virtual T at(uint32_t off) const = 0;
+};
+}
+
+namespace feature
+{
+
+struct Shape
+{
+ int32_t C;
+ int32_t H;
+ int32_t W;
+};
+
+template <typename T> struct View
+{
+ virtual ~View() = default;
+
+ virtual const Shape &shape(void) const = 0;
+ virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0;
+};
+}
+
+namespace kernel
+{
+
+struct Shape
+{
+ int32_t N;
+ int32_t C;
+ int32_t H;
+ int32_t W;
+};
+
+template <typename T> struct View
+{
+ virtual ~View() = default;
+
+ virtual const Shape &shape(void) const = 0;
+ virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0;
+};
+}
+
+const int32_t N = 1;
+const int32_t C = 2;
+
+class SampleBiasObject final : public vector::View<float>
+{
+public:
+ SampleBiasObject() : _size(N)
+ {
+ // DO NOTHING
+ }
+
+public:
+ int32_t size(void) const override { return _size; }
+
+ float at(uint32_t off) const override { return 0.0f; }
+
+private:
+ int32_t _size;
+};
+
+class SampleFeatureObject final : public feature::View<float>
+{
+public:
+ SampleFeatureObject()
+ {
+ _shape.C = C;
+ _shape.H = 3;
+ _shape.W = 4;
+
+ const uint32_t size = _shape.C * _shape.H * _shape.W;
+
+ for (uint32_t off = 0; off < size; ++off)
+ {
+ _value.emplace_back(off);
+ }
+
+ assert(_value.size() == size);
+ }
+
+public:
+ const feature::Shape &shape(void) const override { return _shape; };
+
+ float at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ return _value.at(ch * _shape.H * _shape.W + row * _shape.W + col);
+ }
+
+public:
+ float &at(uint32_t ch, uint32_t row, uint32_t col)
+ {
+ return _value.at(ch * _shape.H * _shape.W + row * _shape.W + col);
+ }
+
+private:
+ feature::Shape _shape;
+ std::vector<float> _value;
+};
+
+class SampleKernelObject final : public kernel::View<float>
+{
+public:
+ SampleKernelObject()
+ {
+ _shape.N = N;
+ _shape.C = C;
+ _shape.H = 3;
+ _shape.W = 4;
+
+ const uint32_t size = _shape.N * _shape.C * _shape.H * _shape.W;
+
+ for (uint32_t off = 0; off < size; ++off)
+ {
+ _value.emplace_back(off);
+ }
+
+ assert(_value.size() == size);
+ }
+
+public:
+ const kernel::Shape &shape(void) const override { return _shape; };
+
+ float at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ return _value.at(nth * _shape.C * _shape.H * _shape.W + ch * _shape.H * _shape.W +
+ row * _shape.W + col);
+ }
+
+private:
+ kernel::Shape _shape;
+ std::vector<float> _value;
+};
+
+int main(int argc, char **argv)
+{
+ const SampleFeatureObject ifm;
+ const SampleKernelObject kernel;
+ const SampleBiasObject bias;
+
+ const int32_t IFM_C = ifm.shape().C;
+ const int32_t IFM_H = ifm.shape().H;
+ const int32_t IFM_W = ifm.shape().W;
+
+ const int32_t KER_N = kernel.shape().N;
+ const int32_t KER_C = kernel.shape().C;
+ const int32_t KER_H = kernel.shape().H;
+ const int32_t KER_W = kernel.shape().W;
+
+ const int32_t OFM_C = kernel.shape().N;
+ const int32_t OFM_H = (IFM_H - KER_H) + 1;
+ const int32_t OFM_W = (IFM_W - KER_W) + 1;
+
+ // Assumption on this example
+ assert(IFM_C == KER_C);
+ assert(KER_N == bias.size());
+
+ // Comment from 'context.h'
+ //
+ // Parameters for asymmetric quantization. Quantized values can be converted
+ // back to float using:
+ // real_value = scale * (quantized_value - zero_point);
+ //
+ // Q: Is this necessary?
+ TfLiteQuantizationParams quantization;
+
+ quantization.scale = 1;
+ quantization.zero_point = 0;
+
+ Interpreter interp;
+
+ // On AddTensors(N) call, T/F Lite interpreter creates N tensors whose index is [0 ~ N)
+ interp.AddTensors(5);
+
+ // Configure OFM
+ interp.SetTensorParametersReadWrite(0, kTfLiteFloat32 /* type */, "output" /* name */,
+ {1 /*N*/, OFM_H, OFM_W, OFM_C} /* dims */, quantization);
+
+ // Configure IFM
+ interp.SetTensorParametersReadWrite(1, kTfLiteFloat32 /* type */, "input" /* name */,
+ {1 /*N*/, IFM_H, IFM_W, IFM_C} /* dims */, quantization);
+
+ // Configure Filter
+ const uint32_t kernel_size = KER_N * KER_C * KER_H * KER_W;
+ float kernel_data[kernel_size] = {
+ 0.0f,
+ };
+
+ // Fill kernel data in NHWC order
+ {
+ uint32_t off = 0;
+
+ for (uint32_t nth = 0; nth < KER_N; ++nth)
+ {
+ for (uint32_t row = 0; row < KER_H; ++row)
+ {
+ for (uint32_t col = 0; col < KER_W; ++col)
+ {
+ for (uint32_t ch = 0; ch < KER_C; ++ch)
+ {
+ const auto value = kernel.at(nth, ch, row, col);
+ kernel_data[off++] = value;
+ }
+ }
+ }
+ }
+
+ assert(kernel_size == off);
+ }
+
+ interp.SetTensorParametersReadOnly(
+ 2, kTfLiteFloat32 /* type */, "filter" /* name */, {KER_N, KER_H, KER_W, KER_C} /* dims */,
+ quantization, reinterpret_cast<const char *>(kernel_data), sizeof(kernel_data));
+
+ // Configure Bias
+ const uint32_t bias_size = bias.size();
+ float bias_data[bias_size] = {
+ 0.0f,
+ };
+
+ // Fill bias data
+ for (uint32_t off = 0; off < bias.size(); ++off)
+ {
+ bias_data[off] = bias.at(off);
+ }
+
+ interp.SetTensorParametersReadOnly(3, kTfLiteFloat32 /* type */, "bias" /* name */,
+ {bias.size()} /* dims */, quantization,
+ reinterpret_cast<const char *>(bias_data), sizeof(bias_data));
+
+ // Add Convolution Node
+ //
+ // NOTE AddNodeWithParameters take the ownership of param, and deallocate it with free
+ // So, param should be allocated with malloc
+ TfLiteConvParams *param = reinterpret_cast<TfLiteConvParams *>(malloc(sizeof(TfLiteConvParams)));
+
+ param->padding = kTfLitePaddingValid;
+ param->stride_width = 1;
+ param->stride_height = 1;
+ param->activation = kTfLiteActRelu;
+
+ // Run Convolution and store its result into Tensor #0
+ // - Read IFM from Tensor #1
+ // - Read Filter from Tensor #2,
+ // - Read Bias from Tensor #3
+ interp.AddNodeWithParameters({1, 2, 3}, {0}, nullptr, 0, reinterpret_cast<void *>(param),
+ BuiltinOpResolver().FindOp(BuiltinOperator_CONV_2D, 1));
+
+ // Set Tensor #1 as Input #0, and Tensor #0 as Output #0
+ interp.SetInputs({1});
+ interp.SetOutputs({0});
+
+ // Let's use NNAPI (if possible)
+ interp.UseNNAPI(true);
+
+ // Allocate Tensor
+ interp.AllocateTensors();
+
+ // Fill IFM data in HWC order
+ {
+ uint32_t off = 0;
+
+ for (uint32_t row = 0; row < ifm.shape().H; ++row)
+ {
+ for (uint32_t col = 0; col < ifm.shape().W; ++col)
+ {
+ for (uint32_t ch = 0; ch < ifm.shape().C; ++ch)
+ {
+ const auto value = ifm.at(ch, row, col);
+ interp.typed_input_tensor<float>(0)[off++] = value;
+ }
+ }
+ }
+ }
+
+ // Let's Rock-n-Roll!
+ interp.Invoke();
+
+ // Print OFM
+ {
+ uint32_t off = 0;
+
+ for (uint32_t row = 0; row < OFM_H; ++row)
+ {
+ for (uint32_t col = 0; col < OFM_W; ++col)
+ {
+ for (uint32_t ch = 0; ch < kernel.shape().N; ++ch)
+ {
+ std::cout << interp.typed_output_tensor<float>(0)[off++] << std::endl;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/runtime/contrib/logging/CMakeLists.txt b/runtime/contrib/logging/CMakeLists.txt
new file mode 100644
index 000000000..b200bf89a
--- /dev/null
+++ b/runtime/contrib/logging/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(NOT BUILD_LOGGING)
+ return()
+endif(NOT BUILD_LOGGING)
+
+file(GLOB_RECURSE NNAPI_LOGGING_SRCS "src/*.cc")
+
+nnas_find_package(Boost REQUIRED)
+
+add_library(neuralnetworks SHARED ${NNAPI_LOGGING_SRCS})
+target_include_directories(neuralnetworks PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(neuralnetworks PRIVATE ${Boost_INCLUDE_DIRS})
+target_link_libraries(neuralnetworks PUBLIC nnfw-nnapi-header)
diff --git a/runtime/contrib/logging/include/operand.def b/runtime/contrib/logging/include/operand.def
new file mode 100644
index 000000000..c570cf026
--- /dev/null
+++ b/runtime/contrib/logging/include/operand.def
@@ -0,0 +1,12 @@
+// Extracted from tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+//
+// NNAPI_OPERAND(NAME, CODE)
+#ifndef NNAPI_OPERAND
+#error NNAPI_OPERAND should be defined
+#endif
+NNAPI_OPERAND(ANEURALNETWORKS_FLOAT32, 0)
+NNAPI_OPERAND(ANEURALNETWORKS_INT32, 1)
+NNAPI_OPERAND(ANEURALNETWORKS_UINT32, 2)
+NNAPI_OPERAND(ANEURALNETWORKS_TENSOR_FLOAT32, 3)
+NNAPI_OPERAND(ANEURALNETWORKS_TENSOR_INT32, 4)
+NNAPI_OPERAND(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, 5)
diff --git a/runtime/contrib/logging/include/operation.def b/runtime/contrib/logging/include/operation.def
new file mode 100644
index 000000000..cace360d7
--- /dev/null
+++ b/runtime/contrib/logging/include/operation.def
@@ -0,0 +1,15 @@
+// Extracted from tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+//
+// NNAPI_OPERATION(NAME, CODE)
+#ifndef NNAPI_OPERATION
+#error NNAPI_OPERATION should be defined
+#endif
+NNAPI_OPERATION(ANEURALNETWORKS_AVERAGE_POOL_2D, 1)
+NNAPI_OPERATION(ANEURALNETWORKS_CONCATENATION, 2)
+NNAPI_OPERATION(ANEURALNETWORKS_CONV_2D, 3)
+NNAPI_OPERATION(ANEURALNETWORKS_DEPTHWISE_CONV_2D, 4)
+NNAPI_OPERATION(ANEURALNETWORKS_FULLY_CONNECTED, 9)
+NNAPI_OPERATION(ANEURALNETWORKS_MAX_POOL_2D, 17)
+NNAPI_OPERATION(ANEURALNETWORKS_RESHAPE, 22)
+NNAPI_OPERATION(ANEURALNETWORKS_RESIZE_BILINEAR, 23)
+NNAPI_OPERATION(ANEURALNETWORKS_SOFTMAX, 25)
diff --git a/runtime/contrib/logging/src/nnapi_logging.cc b/runtime/contrib/logging/src/nnapi_logging.cc
new file mode 100644
index 000000000..14f2369ec
--- /dev/null
+++ b/runtime/contrib/logging/src/nnapi_logging.cc
@@ -0,0 +1,399 @@
+#include <NeuralNetworks.h>
+#include <NeuralNetworksEx.h>
+
+#include <stdexcept>
+#include <iostream>
+
+#include <string>
+#include <map>
+
+#include <cassert>
+
+#include <boost/format.hpp>
+
+namespace
+{
+
+class OperationCodeResolver
+{
+public:
+ OperationCodeResolver();
+
+public:
+ std::string resolve(int code) const;
+
+private:
+ void setName(int code, const std::string &name);
+
+private:
+ std::map<int, std::string> _table;
+
+public:
+ static const OperationCodeResolver &access()
+ {
+ static const OperationCodeResolver resolver;
+
+ return resolver;
+ }
+};
+
+OperationCodeResolver::OperationCodeResolver()
+{
+#define NNAPI_OPERATION(NAME, CODE) setName(CODE, #NAME);
+#include "operation.def"
+#undef NNAPI_OPERATION
+}
+
+void OperationCodeResolver::setName(int code, const std::string &name)
+{
+ assert(_table.find(code) == _table.end());
+ _table[code] = name;
+}
+
+std::string OperationCodeResolver::resolve(int code) const
+{
+ auto it = _table.find(code);
+
+ if (it == _table.end())
+ {
+ return boost::str(boost::format("unknown(%d)") % code);
+ }
+
+ return it->second;
+}
+
+class OperandCodeResolver
+{
+public:
+ OperandCodeResolver();
+
+public:
+ std::string resolve(int code) const;
+
+private:
+ void setName(int code, const std::string &name);
+
+private:
+ std::map<int, std::string> _table;
+
+public:
+ static const OperandCodeResolver &access()
+ {
+ static const OperandCodeResolver resolver;
+
+ return resolver;
+ }
+};
+
+OperandCodeResolver::OperandCodeResolver()
+{
+#define NNAPI_OPERAND(NAME, CODE) setName(CODE, #NAME);
+#include "operand.def"
+#undef NNAPI_OPERAND
+}
+
+void OperandCodeResolver::setName(int code, const std::string &name)
+{
+ assert(_table.find(code) == _table.end());
+ _table[code] = name;
+}
+
+std::string OperandCodeResolver::resolve(int code) const
+{
+ auto it = _table.find(code);
+
+ if (it == _table.end())
+ {
+ return boost::str(boost::format("unknown(%d)") % code);
+ }
+
+ return it->second;
+}
+}
+
+//
+// Asynchronous Event
+//
+struct ANeuralNetworksEvent
+{
+};
+
+int ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event) { return ANEURALNETWORKS_NO_ERROR; }
+
+void ANeuralNetworksEvent_free(ANeuralNetworksEvent *event) { delete event; }
+
+//
+// Memory
+//
+struct ANeuralNetworksMemory
+{
+ // 1st approach - Store all the data inside ANeuralNetworksMemory object
+ // 2nd approach - Store metadata only, and defer data loading as much as possible
+};
+
+int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
+ ANeuralNetworksMemory **memory)
+{
+ *memory = new ANeuralNetworksMemory;
+
+ std::cout << __FUNCTION__ << "() --> (memory: " << *memory << ")" << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory)
+{
+ std::cout << __FUNCTION__ << "(" << memory << ")" << std::endl;
+ delete memory;
+}
+
+//
+// Model
+//
+struct ANeuralNetworksModel
+{
+ // ANeuralNetworksModel should be a factory for Graph IR (a.k.a ISA Frontend)
+ // TODO Record # of operands
+ uint32_t numOperands;
+
+ ANeuralNetworksModel() : numOperands(0)
+ {
+ // DO NOTHING
+ }
+};
+
+int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
+{
+ *model = new ANeuralNetworksModel;
+
+ std::cout << __FUNCTION__ << "(" << model << ") --> (model: " << *model << ")" << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksModel_free(ANeuralNetworksModel *model)
+{
+ std::cout << __FUNCTION__ << "(" << model << ")" << std::endl;
+
+ delete model;
+}
+
+int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
+ const ANeuralNetworksOperandType *type)
+{
+ std::cout << __FUNCTION__ << "(model: " << model
+ << ", type: " << ::OperandCodeResolver::access().resolve(type->type) << ")"
+ << std::endl;
+
+ auto id = model->numOperands;
+
+ std::cout << " id: " << id << std::endl;
+ std::cout << " rank: " << type->dimensionCount << std::endl;
+ for (uint32_t dim = 0; dim < type->dimensionCount; ++dim)
+ {
+ std::cout << " dim(" << dim << "): " << type->dimensions[dim] << std::endl;
+ }
+
+ model->numOperands += 1;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
+ const void *buffer, size_t length)
+{
+ std::cout << __FUNCTION__ << "(model: " << model << ", index: " << index << ")" << std::endl;
+
+ // TODO Implement this!
+ // NOTE buffer becomes invalid after ANeuralNetworksModel_setOperandValue returns
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksMemory *memory,
+ size_t offset, size_t length)
+{
+ std::cout << __FUNCTION__ << "(model: " << model << ", index: " << index << ")" << std::endl;
+
+ // TODO Implement this!
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ std::cout << __FUNCTION__ << "(model: " << model
+ << ", type: " << ::OperationCodeResolver::access().resolve(type)
+ << ", inputCount: " << inputCount << ", outputCount: " << outputCount << ")"
+ << std::endl;
+
+ for (uint32_t input = 0; input < inputCount; ++input)
+ {
+ std::cout << " input(" << input << "): " << inputs[input] << std::endl;
+ }
+ for (uint32_t output = 0; output < outputCount; ++output)
+ {
+ std::cout << " output(" << output << "): " << outputs[output] << std::endl;
+ }
+
+ // TODO Implement this!
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ std::cout << __FUNCTION__ << "(model: " << model << ", type: " << type
+ << ", inputCount: " << inputCount << ", outputCount: " << outputCount << ")"
+ << std::endl;
+
+ for (uint32_t input = 0; input < inputCount; ++input)
+ {
+ std::cout << " input(" << input << "): " << inputs[input] << std::endl;
+ }
+ for (uint32_t output = 0; output < outputCount; ++output)
+ {
+ std::cout << " output(" << output << "): " << outputs[output] << std::endl;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ std::cout << __FUNCTION__ << "(model: " << model << ")" << std::endl;
+
+ for (uint32_t input = 0; input < inputCount; ++input)
+ {
+ std::cout << " input(" << input << "): " << inputs[input] << std::endl;
+ }
+ for (uint32_t output = 0; output < outputCount; ++output)
+ {
+ std::cout << " output(" << output << "): " << outputs[output] << std::endl;
+ }
+
+ // TODO Implement this!
+ // NOTE It seems that this function identifies the input and output of the whole model
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
+{
+ std::cout << __FUNCTION__ << "(model: " << model << ")" << std::endl;
+
+ // TODO Implement this!
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+//
+// Compilation
+//
+struct ANeuralNetworksCompilation
+{
+ // ANeuralNetworksCompilation should hold a compiled IR
+};
+
+int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
+ ANeuralNetworksCompilation **compilation)
+{
+ *compilation = new ANeuralNetworksCompilation;
+
+ std::cout << __FUNCTION__ << "(model: " << model << ") --> (compilation: " << *compilation << ")"
+ << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
+{
+ std::cout << __FUNCTION__ << "(compilation: " << compilation << ")" << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+//
+// Execution
+//
+struct ANeuralNetworksExecution
+{
+ // ANeuralNetworksExecution corresponds to NPU::Interp::Session
+};
+
+int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
+ ANeuralNetworksExecution **execution)
+{
+ *execution = new ANeuralNetworksExecution;
+
+ std::cout << __FUNCTION__ << "(compilation: " << compilation << ") --> (execution: " << *execution
+ << ")" << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+// ANeuralNetworksExecution_setInput and ANeuralNetworksExecution_setOutput specify HOST buffer for
+// input/output
+int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length)
+{
+ std::cout << __FUNCTION__ << "(execution: " << execution << ", type: ";
+
+ if (type == nullptr)
+ std::cout << "nullptr)" << std::endl;
+ else
+ std::cout << ::OperandCodeResolver::access().resolve(type->type) << ")" << std::endl;
+
+ // Q: Should we transfer input from HOST to DEVICE here, or in
+ // ANeuralNetworksExecution_startCompute?
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, void *buffer,
+ size_t length)
+{
+ std::cout << __FUNCTION__ << "(execution: " << execution << ", type: ";
+
+ if (type == nullptr)
+ std::cout << "nullptr)" << std::endl;
+ else
+ std::cout << ::OperandCodeResolver::access().resolve(type->type) << ")" << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
+ ANeuralNetworksEvent **event)
+{
+ *event = new ANeuralNetworksEvent;
+
+ std::cout << __FUNCTION__ << "(execution: " << execution << ") --> (event: " << *event << ")"
+ << std::endl;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution)
+{
+ std::cout << __FUNCTION__ << "(execution: " << execution << ")" << std::endl;
+
+ delete execution;
+}
+
+void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
+{
+ std::cout << __FUNCTION__ << "(compilation: " << compilation << ")" << std::endl;
+ delete compilation;
+}
diff --git a/runtime/contrib/mlapse/CMakeLists.txt b/runtime/contrib/mlapse/CMakeLists.txt
new file mode 100644
index 000000000..bba79971a
--- /dev/null
+++ b/runtime/contrib/mlapse/CMakeLists.txt
@@ -0,0 +1,8 @@
+if(NOT BUILD_MLAPSE)
+ return()
+endif(NOT BUILD_MLAPSE)
+
+message(STATUS "Build mlapse: TRUE")
+
+# TODO Add "core"
+add_subdirectory(tfl)
diff --git a/runtime/contrib/mlapse/README.md b/runtime/contrib/mlapse/README.md
new file mode 100644
index 000000000..36f14ac39
--- /dev/null
+++ b/runtime/contrib/mlapse/README.md
@@ -0,0 +1,3 @@
+# mlapse
+
+_mlapse_ is a toolkit for model inference latency benchmark.
diff --git a/runtime/contrib/mlapse/tfl/CMakeLists.txt b/runtime/contrib/mlapse/tfl/CMakeLists.txt
new file mode 100644
index 000000000..36f32d7ef
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/CMakeLists.txt
@@ -0,0 +1,12 @@
+message(STATUS "Build mlapse-tfl: TRUE")
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_executable(mlapse-tfl ${SOURCES})
+target_include_directories(mlapse-tfl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(mlapse-tfl nnfw_lib_tflite)
+target_link_libraries(mlapse-tfl nnfw_lib_misc)
+target_link_libraries(mlapse-tfl nnfw_lib_cpp14)
+target_link_libraries(mlapse-tfl tensorflow-lite)
+
+install(TARGETS mlapse-tfl DESTINATION bin)
diff --git a/runtime/contrib/mlapse/tfl/driver.cc b/runtime/contrib/mlapse/tfl/driver.cc
new file mode 100644
index 000000000..867a6051a
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/driver.cc
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mlapse/benchmark_runner.h"
+#include "mlapse/multicast_observer.h"
+#include "mlapse/CSV_report_generator.h"
+
+#include "mlapse/tfl/load.h"
+
+// From 'nnfw_lib_tflite'
+#include <tflite/InterpreterSession.h>
+#include <tflite/NNAPISession.h>
+
+// From 'nnfw_lib_cpp14'
+#include <cpp14/memory.h>
+
+// From C++ Standard Library
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+namespace
+{
+
+using namespace mlapse;
+
+class ConsoleReporter final : public mlapse::BenchmarkObserver
+{
+public:
+ ConsoleReporter() = default;
+
+public:
+ void notify(const NotificationArg<PhaseBegin> &arg) final
+ {
+ _phase = arg.phase;
+ _count = arg.count;
+
+ std::cout << tag() << " BEGIN" << std::endl;
+ }
+
+ void notify(const NotificationArg<PhaseEnd> &arg) final
+ {
+ std::cout << tag() << " END" << std::endl;
+
+ _phase = mlapse::uninitialized_phase();
+ _count = 0;
+ }
+
+ void notify(const NotificationArg<IterationBegin> &arg) final { _index = arg.index; }
+
+ void notify(const NotificationArg<IterationEnd> &arg) final
+ {
+ std::cout << tag() << " " << progress() << " - " << arg.latency.count() << "ms" << std::endl;
+ }
+
+private:
+ std::string progress(void) const
+ {
+ return "[" + std::to_string(_index + 1) + "/" + std::to_string(_count) + "]";
+ }
+
+ std::string tag(void) const
+ {
+ switch (_phase)
+ {
+ case Phase::Warmup:
+ return "WARMUP";
+ case Phase::Record:
+ return "RECORD";
+ default:
+ break;
+ }
+
+ return "unknown";
+ }
+
+ Phase _phase = mlapse::uninitialized_phase();
+ uint32_t _count = 0;
+ uint32_t _index = 0;
+};
+
+} // namespace
+
+// Q. Is is worth to make a library for these routines?
+namespace
+{
+
+enum class SessionType
+{
+ Interp,
+ NNAPI,
+};
+
+class SessionBuilder
+{
+public:
+ SessionBuilder(const SessionType &type) : _type{type}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<nnfw::tflite::Session> with(tflite::Interpreter *interp) const
+ {
+ switch (_type)
+ {
+ case SessionType::Interp:
+ return nnfw::cpp14::make_unique<nnfw::tflite::InterpreterSession>(interp);
+ case SessionType::NNAPI:
+ return nnfw::cpp14::make_unique<nnfw::tflite::NNAPISession>(interp);
+ default:
+ break;
+ }
+
+ return nullptr;
+ }
+
+ std::unique_ptr<nnfw::tflite::Session>
+ with(const std::unique_ptr<tflite::Interpreter> &interp) const
+ {
+ return with(interp.get());
+ }
+
+private:
+ SessionType _type;
+};
+
+SessionBuilder make_session(const SessionType &type) { return SessionBuilder{type}; }
+
+} // namespace
+
+namespace
+{
+
+// mlapse-tfl
+// [REQUIRED] --model <path/to/tflite>
+// [OPTIONAL] --warmup-count N (default = 3)
+// [OPTIONAL] --record-count N (default = 10)
+// [OPTIONAL] --thread N or auto (default = auto)
+// [OPTIOANL] --nnapi (default = off)
+// [OPTIONAL] --pause N (default = 0)
+// [OPTIONAL] --csv-report <path/to/csv>
+int entry(const int argc, char **argv)
+{
+ // Create an observer
+ mlapse::MulticastObserver observer;
+
+ observer.append(nnfw::cpp14::make_unique<ConsoleReporter>());
+
+ // Set default parameters
+ std::string model_path;
+ bool model_path_initialized = false;
+
+ SessionType session_type = SessionType::Interp;
+ uint32_t warmup_count = 3;
+ uint32_t record_count = 10;
+ int num_thread = -1; // -1 means "auto"
+
+ // Read command-line arguments
+ std::map<std::string, std::function<uint32_t(const char *const *)>> opts;
+
+ opts["--model"] = [&model_path, &model_path_initialized](const char *const *tok) {
+ model_path = std::string{tok[0]};
+ model_path_initialized = true;
+ return 1; // # of arguments
+ };
+
+ opts["--record-count"] = [&record_count](const char *const *tok) {
+ record_count = std::stoi(tok[0]);
+ return 1; // # of arguments
+ };
+
+ opts["--thread"] = [](const char *const *tok) {
+ assert(std::string{tok[0]} == "auto");
+ return 1;
+ };
+
+ opts["--nnapi"] = [&session_type](const char *const *) {
+ session_type = SessionType::NNAPI;
+ return 0;
+ };
+
+ opts["--csv-report"] = [&observer](const char *const *tok) {
+ observer.append(nnfw::cpp14::make_unique<mlapse::CSVReportGenerator>(tok[0]));
+ return 1;
+ };
+
+ {
+ uint32_t offset = 1;
+
+ while (offset < argc)
+ {
+ auto opt = argv[offset];
+
+ auto it = opts.find(opt);
+
+ if (it == opts.end())
+ {
+ std::cout << "INVALID OPTION: " << opt << std::endl;
+ return 255;
+ }
+
+ auto func = it->second;
+
+ auto num_skip = func(argv + offset + 1);
+
+ offset += 1;
+ offset += num_skip;
+ }
+ }
+
+ // Check arguments
+ if (!model_path_initialized)
+ {
+ std::cerr << "ERROR: --model is missing" << std::endl;
+ return 255;
+ }
+
+ // Load T/F Lite model
+ auto model = mlapse::tfl::load_model(model_path);
+
+ if (model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load '" << model_path << "'" << std::endl;
+ return 255;
+ }
+
+ auto interp = mlapse::tfl::make_interpreter(model.get());
+
+ if (interp == nullptr)
+ {
+ std::cerr << "ERROR: Failed to create a T/F Lite interpreter" << std::endl;
+ return 255;
+ }
+
+ auto sess = make_session(session_type).with(interp);
+
+ if (sess == nullptr)
+ {
+ std::cerr << "ERROR: Failed to create a session" << std::endl;
+ }
+
+ // Run benchmark
+ mlapse::BenchmarkRunner benchmark_runner{warmup_count, record_count};
+
+ benchmark_runner.attach(&observer);
+ benchmark_runner.run(sess);
+
+ return 0;
+}
+
+} // namespace
+
+int main(int argc, char **argv)
+{
+ try
+ {
+ return entry(argc, argv);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ }
+
+ return 255;
+}
diff --git a/runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.cc b/runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.cc
new file mode 100644
index 000000000..c6237a04f
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mlapse/CSV_report_generator.h"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+std::string tag(const mlapse::Phase &phase)
+{
+ switch (phase)
+ {
+ case mlapse::Phase::Warmup:
+ return "WARMUP";
+ case mlapse::Phase::Record:
+ return "STEADY";
+ default:
+ break;
+ }
+
+ throw std::invalid_argument{"phase"};
+}
+
+} // namespace
+
+namespace mlapse
+{
+
+void CSVReportGenerator::notify(const NotificationArg<PhaseBegin> &arg)
+{
+ assert(_phase == uninitialized_phase());
+ _phase = arg.phase;
+}
+
+void CSVReportGenerator::notify(const NotificationArg<PhaseEnd> &arg)
+{
+ assert(_phase != uninitialized_phase());
+ _phase = uninitialized_phase();
+}
+
+void CSVReportGenerator::notify(const NotificationArg<IterationBegin> &arg)
+{
+ // DO NOTHING
+}
+
+void CSVReportGenerator::notify(const NotificationArg<IterationEnd> &arg)
+{
+ _ofs << tag(_phase) << "," << arg.latency.count() << std::endl;
+}
+
+} // namespace mlapse
diff --git a/runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.h b/runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.h
new file mode 100644
index 000000000..8842baf8e
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/CSV_report_generator.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MLAPSE_CSV_REPORT_GENERATOR_H__
+#define __MLAPSE_CSV_REPORT_GENERATOR_H__
+
+#include "mlapse/benchmark_observer.h"
+
+#include <fstream>
+#include <string>
+
+namespace mlapse
+{
+
+class CSVReportGenerator final : public BenchmarkObserver
+{
+public:
+ CSVReportGenerator(const std::string &path) : _ofs{path, std::ofstream::out}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void notify(const NotificationArg<PhaseBegin> &arg) final;
+ void notify(const NotificationArg<PhaseEnd> &arg) final;
+ void notify(const NotificationArg<IterationBegin> &arg) final;
+ void notify(const NotificationArg<IterationEnd> &arg);
+
+private:
+ std::ofstream _ofs;
+
+ Phase _phase = uninitialized_phase();
+};
+
+} // namespace mlapse
+
+#endif // __MLAPSE_MULTICAST_OBSERER_H__
diff --git a/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.cc b/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.cc
new file mode 100644
index 000000000..f6d596a7b
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mlapse/benchmark_observer.h"
+
+namespace mlapse
+{
+
+Phase uninitialized_phase(void) { return static_cast<Phase>(0); }
+
+} // namespace mlapse
diff --git a/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h b/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h
new file mode 100644
index 000000000..8fc570d24
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/benchmark_observer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MLAPSE_BENCHMARK_OBSERVER_H__
+#define __MLAPSE_BENCHMARK_OBSERVER_H__
+
+#include <cstdint>
+#include <chrono>
+
+namespace mlapse
+{
+
+enum Phase : int32_t
+{
+ // 0 denotes "uninitialized value"
+ Warmup = 1,
+ Record = 2,
+};
+
+Phase uninitialized_phase(void);
+
+enum Notification
+{
+ PhaseBegin,
+ PhaseEnd,
+ IterationBegin,
+ IterationEnd,
+};
+
+template <Notification N> struct NotificationArg;
+
+template <> struct NotificationArg<PhaseBegin>
+{
+ Phase phase;
+ uint32_t count;
+};
+
+template <> struct NotificationArg<PhaseEnd>
+{
+};
+
+template <> struct NotificationArg<IterationBegin>
+{
+ uint32_t index;
+};
+
+template <> struct NotificationArg<IterationEnd>
+{
+ std::chrono::milliseconds latency;
+};
+
+struct BenchmarkObserver
+{
+ virtual ~BenchmarkObserver() = default;
+
+ virtual void notify(const NotificationArg<PhaseBegin> &arg) = 0;
+ virtual void notify(const NotificationArg<PhaseEnd> &arg) = 0;
+ virtual void notify(const NotificationArg<IterationBegin> &arg) = 0;
+ virtual void notify(const NotificationArg<IterationEnd> &arg) = 0;
+};
+
+} // namespace mlpase
+
+#endif // __MLAPSE_BENCHMARK_OBSERVER_H__
diff --git a/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.cc b/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.cc
new file mode 100644
index 000000000..f5fc7302d
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mlapse/benchmark_runner.h"
+
+// From 'nnfw_lib_misc'
+#include <misc/benchmark.h>
+
+// From C++ Standard Library
+#include <cassert>
+
+namespace mlapse
+{
+void BenchmarkRunner::attach(BenchmarkObserver *observer)
+{
+ assert(_observer == nullptr);
+ _observer = observer;
+}
+
+void BenchmarkRunner::run(nnfw::tflite::Session *sess) const
+{
+ for (auto phase : {Warmup, Record})
+ {
+ uint32_t const count = _count.at(phase);
+
+ // Notify when each phase begins
+ {
+ NotificationArg<PhaseBegin> arg;
+
+ arg.phase = phase;
+ arg.count = count;
+
+ notify(arg);
+ }
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ std::chrono::milliseconds elapsed(0);
+
+ sess->prepare();
+
+ // Notify when each iteration begins
+ {
+ NotificationArg<IterationBegin> arg;
+
+ arg.index = n;
+
+ notify(arg);
+ };
+
+ nnfw::misc::benchmark::measure(elapsed) << [&](void) {
+ if (!sess->run())
+ {
+ throw std::runtime_error{"run failed"};
+ }
+ };
+
+ // Notify when each iteration ends
+ {
+ NotificationArg<IterationEnd> arg;
+
+ arg.latency = elapsed;
+
+ notify(arg);
+ };
+
+ sess->teardown();
+ }
+
+ // Notify when each phase ends
+ {
+ NotificationArg<PhaseEnd> arg;
+
+ notify(arg);
+ }
+ }
+}
+
+void BenchmarkRunner::notify(const NotificationArg<PhaseBegin> &arg) const
+{
+ if (_observer)
+ {
+ _observer->notify(arg);
+ }
+}
+
+void BenchmarkRunner::notify(const NotificationArg<PhaseEnd> &arg) const
+{
+ if (_observer)
+ {
+ _observer->notify(arg);
+ }
+}
+
+void BenchmarkRunner::notify(const NotificationArg<IterationBegin> &arg) const
+{
+ if (_observer)
+ {
+ _observer->notify(arg);
+ }
+}
+
+void BenchmarkRunner::notify(const NotificationArg<IterationEnd> &arg) const
+{
+ if (_observer)
+ {
+ _observer->notify(arg);
+ }
+}
+
+} // namespace mlapse
diff --git a/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h b/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h
new file mode 100644
index 000000000..fcbb41d1b
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/benchmark_runner.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MLAPSE_BENCHMARK_RUNNER_H__
+#define __MLAPSE_BENCHMARK_RUNNER_H__
+
+#include "mlapse/benchmark_observer.h"
+
+#include <tflite/Session.h>
+
+#include <chrono>
+#include <map>
+
+namespace mlapse
+{
+
+class BenchmarkRunner final
+{
+public:
+ BenchmarkRunner(uint32_t warmup_count, uint32_t record_count)
+ {
+ _count[Warmup] = warmup_count;
+ _count[Record] = record_count;
+ }
+
+public:
+ void attach(BenchmarkObserver *observer);
+
+public:
+ void run(nnfw::tflite::Session *sess) const;
+
+public:
+ void run(const std::unique_ptr<nnfw::tflite::Session> &sess) const { run(sess.get()); }
+
+private:
+ void notify(const NotificationArg<PhaseBegin> &arg) const;
+ void notify(const NotificationArg<PhaseEnd> &arg) const;
+ void notify(const NotificationArg<IterationBegin> &arg) const;
+ void notify(const NotificationArg<IterationEnd> &arg) const;
+
+private:
+ std::map<Phase, uint32_t> _count;
+
+private:
+ BenchmarkObserver *_observer = nullptr;
+};
+
+} // namespace mlpase
+
+#endif // __MLAPSE_BENCHMARK_RUNNER_H__
diff --git a/runtime/contrib/mlapse/tfl/mlapse/multicast_observer.cc b/runtime/contrib/mlapse/tfl/mlapse/multicast_observer.cc
new file mode 100644
index 000000000..639acfe45
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/multicast_observer.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mlapse/multicast_observer.h"
diff --git a/runtime/contrib/mlapse/tfl/mlapse/multicast_observer.h b/runtime/contrib/mlapse/tfl/mlapse/multicast_observer.h
new file mode 100644
index 000000000..e4aac50a9
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/multicast_observer.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MLAPSE_MULTICAST_OBSERER_H__
+#define __MLAPSE_MULTICAST_OBSERER_H__
+
+#include "mlapse/benchmark_observer.h"
+
+#include <memory>
+#include <vector>
+
+namespace mlapse
+{
+
+class MulticastObserver final : public BenchmarkObserver
+{
+public:
+ MulticastObserver() = default;
+
+public:
+ void append(std::unique_ptr<BenchmarkObserver> &&o) { _observers.emplace_back(std::move(o)); }
+
+public:
+ void notify(const NotificationArg<PhaseBegin> &arg) final
+ {
+ for (const auto &o : _observers)
+ {
+ o->notify(arg);
+ }
+ }
+
+ void notify(const NotificationArg<PhaseEnd> &arg) final
+ {
+ for (const auto &o : _observers)
+ {
+ o->notify(arg);
+ }
+ }
+
+ void notify(const NotificationArg<IterationBegin> &arg) final
+ {
+ for (const auto &o : _observers)
+ {
+ o->notify(arg);
+ }
+ }
+
+ void notify(const NotificationArg<IterationEnd> &arg) final
+ {
+ for (const auto &o : _observers)
+ {
+ o->notify(arg);
+ }
+ }
+
+private:
+ std::vector<std::unique_ptr<BenchmarkObserver>> _observers;
+};
+
+} // namespace mlapse
+
+#endif // __MLAPSE_MULTICAST_OBSERER_H__
diff --git a/runtime/contrib/mlapse/tfl/mlapse/tfl/load.cc b/runtime/contrib/mlapse/tfl/mlapse/tfl/load.cc
new file mode 100644
index 000000000..9e770aecf
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/tfl/load.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mlapse/tfl/load.h"
+
+#include <tflite/ext/kernels/register.h>
+
+namespace
+{
+
+tflite::StderrReporter error_reporter;
+
+} // namespace
+
+namespace mlapse
+{
+namespace tfl
+{
+
+std::unique_ptr<tflite::FlatBufferModel> load_model(const std::string &path)
+{
+ return tflite::FlatBufferModel::BuildFromFile(path.c_str(), &error_reporter);
+}
+
+std::unique_ptr<tflite::Interpreter> make_interpreter(const tflite::FlatBufferModel *model)
+{
+ // Let's use extended resolver!
+ nnfw::tflite::BuiltinOpResolver resolver;
+ tflite::InterpreterBuilder builder(*model, resolver);
+
+ std::unique_ptr<tflite::Interpreter> interpreter;
+
+ if (builder(&interpreter) != kTfLiteOk)
+ {
+ return nullptr;
+ }
+
+ return std::move(interpreter);
+}
+
+} // namespace tfl
+} // namespace mlapse
diff --git a/runtime/contrib/mlapse/tfl/mlapse/tfl/load.h b/runtime/contrib/mlapse/tfl/mlapse/tfl/load.h
new file mode 100644
index 000000000..6f5a8f1ea
--- /dev/null
+++ b/runtime/contrib/mlapse/tfl/mlapse/tfl/load.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MLAPSE_TFL_LOAD_H__
+#define __MLAPSE_TFL_LOAD_H__
+
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/model.h>
+
+#include <memory>
+
+namespace mlapse
+{
+namespace tfl
+{
+
+std::unique_ptr<tflite::FlatBufferModel> load_model(const std::string &path);
+
+// WARNING
+//
+// model SHOULD outlive Interpreter
+std::unique_ptr<tflite::Interpreter> make_interpreter(const tflite::FlatBufferModel *model);
+
+} // namespace tfl
+} // namespace mlapse
+
+#endif // __MLAPSE_TFL_LOAD_H__
diff --git a/runtime/contrib/pure_arm_compute/CMakeLists.txt b/runtime/contrib/pure_arm_compute/CMakeLists.txt
new file mode 100644
index 000000000..f9cdfcba9
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/CMakeLists.txt
@@ -0,0 +1,29 @@
+if(NOT BUILD_PURE_ARM_COMPUTE)
+ return()
+endif(NOT BUILD_PURE_ARM_COMPUTE)
+
+nnas_find_package(ARMCompute REQUIRED)
+
+file(GLOB_RECURSE SOURCES "src/*.cc")
+
+add_library(nnapi_pure_arm_compute SHARED ${SOURCES})
+# To ignore compiler warnings from ARM Compute Library
+target_compile_options(nnapi_pure_arm_compute PRIVATE -Wno-ignored-attributes)
+target_include_directories(nnapi_pure_arm_compute PUBLIC src)
+if(BUILD_TFLITE_BENCHMARK_MODEL)
+ target_compile_definitions(nnapi_pure_arm_compute PUBLIC "TFLITE_PROFILING_ENABLED")
+endif()
+target_link_libraries(nnapi_pure_arm_compute nnfw-nnapi-header)
+target_link_libraries(nnapi_pure_arm_compute arm_compute arm_compute_ex)
+target_link_libraries(nnapi_pure_arm_compute nnfw_lib_cpp14 nnfw_lib_misc nnfw_lib_profiling)
+
+if(ANDROID)
+ target_link_libraries(nnapi_pure_arm_compute log)
+endif()
+
+set_target_properties(nnapi_pure_arm_compute PROPERTIES OUTPUT_NAME neuralnetworks)
+install(TARGETS nnapi_pure_arm_compute DESTINATION lib/pureacl RENAME neuralnetworks)
+
+# To prevent undefined references
+add_executable(pure_arm_compute_symbolcheck symbolcheck.cpp)
+target_link_libraries(pure_arm_compute_symbolcheck nnapi_pure_arm_compute)
diff --git a/runtime/contrib/pure_arm_compute/src/compilation.cc b/runtime/contrib/pure_arm_compute/src/compilation.cc
new file mode 100644
index 000000000..8cc86ebae
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/compilation.cc
@@ -0,0 +1,6434 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file compilation.cc
+ * @brief This file contains ANeuralNetworksCompilation APIs and related classes
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#include <NeuralNetworks.h>
+
+// For CLKernelLibraryEx initialization
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
+#include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
+
+#include <arm_compute/runtime/SubTensor.h>
+#include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions
+#include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+#include "internal/arm_compute/matrix/View.h"
+#include "internal/arm_compute/kernel/View.h"
+#include "internal/nnapi/matrix/Reader.h"
+#include "internal/nnapi/kernel/Reader.h"
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+#include "internal/nnapi/tensor/Reader.h"
+#include "internal/arm_compute/feature/View.h"
+#include "internal/arm_compute/tensor/View.h"
+
+#include <arm_compute/runtime/misc/functions/GenericReshapeLayer.h>
+#include <arm_compute/runtime/misc/functions/GenericGather.h>
+
+#include "misc/matrix/IndexIterator.h"
+#include "misc/kernel/IndexIterator.h"
+#include "misc/feature/IndexIterator.h"
+#include "misc/tensor/IndexIterator.h"
+
+#include <cpp14/memory.h>
+
+#include "compilation.h"
+#include "model.h"
+#include "logging.h"
+
+using namespace arm_compute::misc;
+
+template <typename T> T from_env(const char *);
+
+template <> bool from_env(const char *s)
+{
+ if (s == nullptr)
+ {
+ return false;
+ }
+
+ return std::stoi(s) != 0;
+}
+
+const char *to_string(const PaddingCode &code)
+{
+ assert((ANEURALNETWORKS_PADDING_SAME == code) || (ANEURALNETWORKS_PADDING_VALID == code));
+
+ switch (code)
+ {
+ case ANEURALNETWORKS_PADDING_SAME:
+ return "ANEURALNETWORKS_PADDING_SAME";
+ case ANEURALNETWORKS_PADDING_VALID:
+ return "ANEURALNETWORKS_PADDING_VALID";
+ }
+
+ return nullptr;
+}
+
+struct Padding
+{
+ uint32_t top;
+ uint32_t bottom;
+ uint32_t left;
+ uint32_t right;
+};
+
+struct Stride
+{
+ uint32_t vertical;
+ uint32_t horizontal;
+};
+
+Padding valid_padding(void)
+{
+ //
+ // ANEURALNETWORKS_PADDING_VALID
+ //
+ // VALID padding. No padding.
+ //
+ // When the input size is not evenly divisible by the filter size,
+ // the input at the end that could not fill the whole filter tile
+ // will simply be ignored.
+ //
+ Padding padding;
+
+ padding.top = 0;
+ padding.bottom = 0;
+ padding.left = 0;
+ padding.right = 0;
+
+ return padding;
+}
+
+Padding same_padding(const nnfw::misc::feature::Shape &ifm_shape,
+ const nnfw::misc::feature::Shape &ofm_shape, const Stride &stride, uint32_t kw,
+ uint32_t kh)
+{
+ Padding padding;
+
+ // ANEURALNETWORKS_PADDING_SAME (from NNAPI spec)
+ //
+ // SAME padding. Padding on both ends are the "same":
+ //
+ // padding_to_beginning = total_padding / 2
+ // padding_to_end = (total_padding + 1)/2.
+ //
+ const int32_t vertical_needed_input = (ofm_shape.H - 1) * stride.vertical + kh;
+ const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
+
+ const int32_t horizontal_needed_input = (ofm_shape.W - 1) * stride.horizontal + kw;
+ const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
+
+ padding.top = vertical_total_padding / 2;
+ padding.bottom = (vertical_total_padding + 1) / 2;
+ padding.left = horizontal_total_padding / 2;
+ padding.right = (horizontal_total_padding + 1) / 2;
+
+ return padding;
+}
+
+::arm_compute::PadStrideInfo asPadStrideInfo(const Padding &padding, const Stride &stride)
+{
+ return ::arm_compute::PadStrideInfo{stride.horizontal,
+ stride.vertical,
+ padding.left,
+ padding.right,
+ padding.top,
+ padding.bottom,
+ ::arm_compute::DimensionRoundingType::FLOOR};
+}
+
+::arm_compute::ActivationLayerInfo asActInfo(FuseCode act)
+{
+ if (act == ANEURALNETWORKS_FUSED_NONE)
+ {
+ return ::arm_compute::ActivationLayerInfo();
+ }
+ else if (act == ANEURALNETWORKS_FUSED_RELU)
+ {
+ return ::arm_compute::ActivationLayerInfo(
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+ }
+ else if (act == ANEURALNETWORKS_FUSED_RELU1)
+ {
+ return ::arm_compute::ActivationLayerInfo(
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f);
+ }
+ else if (act == ANEURALNETWORKS_FUSED_RELU6)
+ {
+ return ::arm_compute::ActivationLayerInfo(
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f);
+ }
+ else
+ {
+ throw std::runtime_error("Not supported, yet");
+ }
+}
+
+struct IAllocationContext
+{
+ virtual ~IAllocationContext() = default;
+
+ virtual ::arm_compute::ITensor *at(const ::internal::tflite::operand::Index &ind) const = 0;
+};
+
+#include "internal/IExecutionBuilder.h"
+
+using Initializer = std::function<void(::arm_compute::ITensor &)>;
+using Stage = std::function<void(const IAllocationContext &, IExecutionBuilder &)>;
+
+using namespace std::placeholders;
+
+template <typename T>
+static void initFeatureTensor(::arm_compute::ITensor &tensor,
+ const nnfw::misc::feature::Shape &feature_shape,
+ const uint8_t *feature_base, const size_t feature_size)
+{
+ const ::internal::nnapi::feature::Reader<T> from{
+ feature_shape, reinterpret_cast<const T *>(feature_base), feature_size};
+ ::internal::arm_compute::feature::View<T> into{&tensor};
+
+ ::nnfw::misc::feature::iterate(feature_shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+}
+
+template <typename T>
+static void initVectorTensor(::arm_compute::ITensor &tensor, const uint8_t *vec_base,
+ const size_t vec_size)
+{
+ for (uint32_t n = 0; n < vec_size; ++n)
+ {
+ const ::arm_compute::Coordinates coordinate{n};
+
+ T *into = reinterpret_cast<T *>(tensor.ptr_to_element(coordinate));
+
+ const T *from = reinterpret_cast<const T *>(vec_base) + n;
+ const auto value = *from;
+
+ *into = value;
+ }
+}
+
+template <typename T>
+static void initTensor3D(::arm_compute::ITensor &tensor,
+ const nnfw::misc::tensor::Shape &tensor_shape, const uint8_t *tensor_base,
+ const size_t tensor_size)
+{
+ const ::internal::nnapi::tensor::Reader<T> from{
+ tensor_shape, reinterpret_cast<const T *>(tensor_base), tensor_size};
+ ::internal::arm_compute::tensor::View<T> into{&tensor};
+
+ ::nnfw::misc::tensor::iterate(tensor_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) {
+ ::nnfw::misc::tensor::Index index_ACL = ::nnfw::misc::tensor::copy_reverse(index_nnapi);
+ into.at(index_ACL) = from.at(index_nnapi);
+ };
+}
+
+template <typename T>
+static void initMatrixTensor(::arm_compute::ITensor &tensor,
+ const nnfw::misc::matrix::Shape &matrix_shape,
+ const uint8_t *matrix_base, const size_t matrix_size)
+{
+ const ::internal::nnapi::matrix::Reader<T> from{
+ matrix_shape, reinterpret_cast<const T *>(matrix_base), matrix_size};
+ ::internal::arm_compute::matrix::View<T> into{&tensor};
+
+ ::nnfw::misc::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) {
+ const auto value = from.at(row, col);
+ into.at(row, col) = value;
+ };
+}
+
+template <typename T>
+static void initReorderVectorTensor(::arm_compute::ITensor &tensor, const uint8_t *vec_base,
+ const size_t vec_size)
+{
+ for (uint32_t n = 0; n < vec_size; ++n)
+ {
+ const ::arm_compute::Coordinates coordinate{ToARMComputeAxis(vec_size, n).value()};
+
+ T *into = reinterpret_cast<T *>(tensor.ptr_to_element(coordinate));
+
+ const T *from = reinterpret_cast<const T *>(vec_base) + n;
+ const auto value = *from;
+
+ *into = value;
+ }
+}
+
+template <typename T>
+static void initKernelTensor(::arm_compute::ITensor &tensor,
+ const nnfw::misc::kernel::Shape &kernel_shape,
+ const uint8_t *kernel_base, const size_t kernel_size)
+{
+ const ::internal::nnapi::kernel::Reader<T> from{
+ kernel_shape, reinterpret_cast<const T *>(kernel_base), kernel_size};
+ ::internal::arm_compute::kernel::View<T> into{&tensor};
+
+ ::nnfw::misc::kernel::iterate(kernel_shape)
+ << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(nth, ch, row, col);
+ into.at(nth, ch, row, col) = value;
+ };
+}
+
+/**
+ * @brief Structure to provide interface methods of compilation plan builder
+ */
+struct IPlanBuilder
+{
+ /**
+ * @brief Destruct IPlanBuilder object using default destructor
+ */
+ virtual ~IPlanBuilder() = default;
+
+ /**
+ * @brief Add TensorInfo with Shape Constraints
+ * @param [in] ind Index of operand
+ * @param [in] info TensorInfo value to set to index of operand
+ * @return N/A
+ */
+ virtual void addShapeConstr(const ::internal::tflite::operand::Index &ind,
+ const ::arm_compute::TensorInfo &info) = 0;
+ /**
+ * @brief Add Subsumption constraints
+ * @param [in] ind Index of operand
+ * @param [in] base Index of base operand of Subsumption
+ * @param [in] offset Offset of Subsumption
+ * @param [in] shape Shape of Subsumption
+ * @param [in] extend_parent extend_parent value of Subsumption
+ * @return N/A
+ */
+ virtual void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
+ const ::internal::tflite::operand::Index &base,
+ const ::arm_compute::Coordinates &offset,
+ const ::arm_compute::TensorShape &shape,
+ bool extend_parent = false) = 0;
+ /**
+ * @brief Add Initializer lambda with ITensor param
+ * @param [in] ind Index of operand
+ * @param [in] initializer Initializer to add
+ * @return N/A
+ */
+ virtual void addInitializer(const ::internal::tflite::operand::Index &ind,
+ const Initializer &initializer) = 0;
+ /**
+ * @brief Add Stage lambda with IAllocationContext and IExecutionBuilder params
+ * @param [in] stage Stage to add
+ * @return N/A
+ */
+ virtual void addStage(const Stage &stage) = 0;
+};
+
+//
+// ActivationBuilder
+//
+class ActivationBuilder
+{
+public:
+ ActivationBuilder(IExecutionBuilder &builder) : _builder(builder)
+ {
+ // DO NOTHING
+ }
+
+private:
+ void appendReLU(::arm_compute::ITensor *tensor);
+ void appendReLU6(::arm_compute::ITensor *tensor);
+ void appendReLU1(::arm_compute::ITensor *tensor);
+
+public:
+ void append(FuseCode code, ::arm_compute::ITensor *tensor);
+
+private:
+ IExecutionBuilder &_builder;
+};
+
+void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
+{
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
+
+ _builder.append("ReLU", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, nullptr, act_info);
+
+ _builder.append("ReLU", std::move(fn));
+ }
+}
+
+void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
+{
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
+
+ _builder.append("ReLU1", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, nullptr, act_info);
+
+ _builder.append("ReLU1", std::move(fn));
+ }
+}
+
+void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
+{
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
+
+ _builder.append("ReLU6", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, nullptr, act_info);
+
+ _builder.append("ReLU6", std::move(fn));
+ }
+}
+
+void ActivationBuilder::append(FuseCode code, ::arm_compute::ITensor *ifm_alloc)
+{
+ switch (code)
+ {
+ case ANEURALNETWORKS_FUSED_NONE:
+ {
+ // DO NOTHING
+ break;
+ }
+ case ANEURALNETWORKS_FUSED_RELU:
+ {
+ appendReLU(ifm_alloc);
+ break;
+ }
+ case ANEURALNETWORKS_FUSED_RELU1:
+ {
+ appendReLU1(ifm_alloc);
+ break;
+ }
+ case ANEURALNETWORKS_FUSED_RELU6:
+ {
+ appendReLU6(ifm_alloc);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+}
+
+class Planner : public ::internal::tflite::op::NodeVisitor
+{
+public:
+ Planner(const ::internal::tflite::operand::Set &ctx, IPlanBuilder &builder)
+ : _ctx{ctx}, _builder{builder}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void visit(const ::internal::tflite::op::Add::Node &node) override;
+ void visit(const ::internal::tflite::op::Sub::Node &node) override;
+ void visit(const ::internal::tflite::op::Mul::Node &node) override;
+ void visit(const ::internal::tflite::op::Div::Node &node) override;
+ void visit(const ::internal::tflite::op::Conv2D::Implicit::Node &node) override;
+ void visit(const ::internal::tflite::op::Conv2D::Explicit::Node &node) override;
+ void visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Node &node) override;
+ void visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Node &node) override;
+ void visit(const ::internal::tflite::op::Dequantize::Node &node) override;
+ void visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &node) override;
+ void visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &node) override;
+ void visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &node) override;
+ void visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &node) override;
+ void visit(const ::internal::tflite::op::Concat::Node &node) override;
+ void visit(const ::internal::tflite::op::FullyConnected::Node &node) override;
+ void visit(const ::internal::tflite::op::ResizeBilinear::Node &node) override;
+ void visit(const ::internal::tflite::op::Reshape::Node &node) override;
+ void visit(const ::internal::tflite::op::Squeeze::Node &node) override;
+ void visit(const ::internal::tflite::op::Softmax::Node &node) override;
+ void visit(const ::internal::tflite::op::StridedSlice::Node &node) override;
+ void visit(const ::internal::tflite::op::ReduceMax::Node &node) override;
+ void visit(const ::internal::tflite::op::ReduceMin::Node &node) override;
+ void visit(const ::internal::tflite::op::Cast::Node &node) override;
+ void visit(const ::internal::tflite::op::TopKV2::Node &node) override;
+ void visit(const ::internal::tflite::op::Gather::Node &node) override;
+ void visit(const ::internal::tflite::op::PReLU::Node &node) override;
+ void visit(const ::internal::tflite::op::ReLU::Node &node) override;
+ void visit(const ::internal::tflite::op::ReLU1::Node &node) override;
+ void visit(const ::internal::tflite::op::ReLU6::Node &node) override;
+ void visit(const ::internal::tflite::op::Tanh::Node &node) override;
+ void visit(const ::internal::tflite::op::Logistic::Node &node) override;
+ void visit(const ::internal::tflite::op::Mean::Node &node) override;
+ void visit(const ::internal::tflite::op::RNN::Node &node) override;
+ void visit(const ::internal::tflite::op::Transpose::Node &node) override;
+ void visit(const ::internal::tflite::op::LSTM::Node &node) override;
+ void visit(const ::internal::tflite::op::Floor::Node &node) override;
+ void visit(const ::internal::tflite::op::Split::Node &node) override;
+ void visit(const ::internal::tflite::op::ArgMax::Node &node) override;
+ void visit(const ::internal::tflite::op::RSQRT::Node &node) override;
+ void visit(const ::internal::tflite::op::SQRT::Node &node) override;
+ void visit(const ::internal::tflite::op::Pad::Node &node) override;
+ void visit(const ::internal::tflite::op::SpaceToDepth::Node &node) override;
+ void visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) override;
+ void visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node) override;
+ void visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node) override;
+ void visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node) override;
+ void visit(const ::internal::tflite::op::EmbeddingLookup::Node &node) override;
+ void visit(const ::internal::tflite::op::HashtableLookup::Node &node) override;
+ void visit(const ::internal::tflite::op::L2Normalization::Node &node) override;
+ void visit(const ::internal::tflite::op::SquaredDifference::Node &node) override;
+ void visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node) override;
+ void visit(const ::internal::tflite::op::DepthToSpace::Node &node) override;
+ void visit(const ::internal::tflite::op::Unpack::Node &node) override;
+ void visit(const ::internal::tflite::op::Neg::Node &node) override;
+ void visit(const ::internal::tflite::op::Exp::Node &node) override;
+ void visit(const ::internal::tflite::op::ReduceSum::Node &node) override;
+ void visit(const ::internal::tflite::op::Equal::Node &node) override;
+ void visit(const ::internal::tflite::op::TransposeConv::Node &node) override;
+ void visit(const ::internal::tflite::op::Pack::Node &node) override;
+ void visit(const ::internal::tflite::op::Abs::Node &node) override;
+ void visit(const ::internal::tflite::op::NotEqual::Node &node) override;
+ void visit(const ::internal::tflite::op::LogicalAnd::Node &node) override;
+ void visit(const ::internal::tflite::op::LogicalNot::Node &node) override;
+ void visit(const ::internal::tflite::op::LogicalOr::Node &node) override;
+
+private:
+ const ::internal::tflite::operand::Set &_ctx;
+ IPlanBuilder &_builder;
+};
+
+void Planner::visit(const ::internal::tflite::op::Add::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
+ const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(lhs_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(rhs_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(
+ lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+ _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+ _builder.addShapeConstr(
+ rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+ _ctx.at(rhs_index).scale(), _ctx.at(rhs_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int lhs_index;
+ int rhs_index;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.lhs_index = lhs_index.asInt();
+ param.rhs_index = rhs_index.asInt();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ const auto lhs_shape = _ctx.at(lhs_index).shape();
+ const auto rhs_shape = _ctx.at(rhs_index).shape();
+ auto stage = [param, lhs_shape, rhs_shape](const IAllocationContext &ctx,
+ IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
+ auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ {
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>();
+
+ // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+ l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
+ ::arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
+ else // NEON
+ {
+ auto l = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>();
+
+ // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+ l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
+ }
+
+ builder.append("Add", std::move(fn));
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
+ const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(lhs_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(rhs_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(
+ lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+ _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+ _builder.addShapeConstr(
+ rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+ _ctx.at(rhs_index).scale(), _ctx.at(rhs_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int lhs_index;
+ int rhs_index;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.lhs_index = lhs_index.asInt();
+ param.rhs_index = rhs_index.asInt();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
+ auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtraction>();
+
+ // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+ fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
+ ::arm_compute::ConvertPolicy::SATURATE);
+
+ builder.append("Sub", std::move(fn));
+ }
+ else // NEON
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>();
+
+ // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+ fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+
+ builder.append("Sub", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
+ const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ if (_ctx.at(ofm_index).scale() > 0)
+ {
+ assert(_ctx.at(ofm_index).scale() > _ctx.at(lhs_index).scale() * _ctx.at(rhs_index).scale());
+ }
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(lhs_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(rhs_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(
+ lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+ _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+ _builder.addShapeConstr(
+ rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+ _ctx.at(rhs_index).scale(), _ctx.at(rhs_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int lhs_index;
+ int rhs_index;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.lhs_index = lhs_index.asInt();
+ param.rhs_index = rhs_index.asInt();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto lhs_input_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
+ auto rhs_input_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>();
+
+ fn->configure(CAST_CL(lhs_input_alloc), CAST_CL(rhs_input_alloc), CAST_CL(output_alloc),
+ 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+
+ builder.append("Mul", std::move(fn));
+ }
+ else // NEON
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>();
+
+ fn->configure(lhs_input_alloc, rhs_input_alloc, output_alloc,
+ 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+
+ builder.append("Mul", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, output_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Div::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
+ const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
+
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(lhs_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(rhs_index).shape())
+ .extendRank(broadcast_rank);
+ }
+
+ _builder.addShapeConstr(
+ lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+ _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+ _builder.addShapeConstr(
+ rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+ _ctx.at(rhs_index).scale(), _ctx.at(rhs_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int lhs_index;
+ int rhs_index;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.lhs_index = lhs_index.asInt();
+ param.rhs_index = rhs_index.asInt();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
+ auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticDivision>();
+
+ fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
+
+ builder.append("Div", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Conv2D::Implicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+ const auto ker_shape = _ctx.at(ker_index).shape().asKernel();
+ const auto bias_size = _ctx.at(bias_index).shape().asVector();
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ Stride stride;
+
+ stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
+ stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+ _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
+
+ // Set initializer for kernel
+ {
+ auto ker_base = _ctx.at(ker_index).data().base();
+ auto ker_size = _ctx.at(ker_index).data().size();
+ auto ker_type = _ctx.at(ker_index).type();
+
+ switch (ker_type)
+ {
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initKernelTensor<float>, _1, ker_shape, ker_base, ker_size);
+ _builder.addInitializer(ker_index, initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer = std::bind(initKernelTensor<uint8_t>, _1, ker_shape, ker_base, ker_size);
+ _builder.addInitializer(ker_index, initializer);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported");
+ }
+ }
+ }
+
+ // Set initializer for bias
+ {
+ auto bias_base = _ctx.at(bias_index).data().base();
+ auto bias_type = _ctx.at(bias_index).type();
+
+ switch (bias_type)
+ {
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initVectorTensor<float>, _1, bias_base, bias_size);
+ _builder.addInitializer(bias_index, initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = std::bind(initVectorTensor<int32_t>, _1, bias_base, bias_size);
+ _builder.addInitializer(bias_index, initializer);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported");
+ }
+ }
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int ker_index;
+ int bias_index;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.ker_index = ker_index.asInt();
+ param.bias_index = bias_index.asInt();
+
+ param.stride = stride;
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+ : valid_padding();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+ auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+ const auto conv_info = asPadStrideInfo(param.padding, param.stride);
+ const auto fused_act = asActInfo(param.activation);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLConvolutionLayer> fn{new ::arm_compute::CLConvolutionLayer};
+
+ // To pass the fused_act parameter, it calls the WeightsInfo() and Size2D(1U, 1U) (dilation)
+ // functions like the default parameter.
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
+ conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U),
+ fused_act);
+
+ builder.append("Conv2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEConvolutionLayer> fn{new ::arm_compute::NEConvolutionLayer};
+
+ // To pass the fused_act parameter, it calls the WeightsInfo() and Size2D(1U, 1U) (dilation)
+ // functions like the default parameter.
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), fused_act);
+
+ builder.append("Conv2D", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Conv2D::Explicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index};
+ const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index};
+ const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index};
+ const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index};
+
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const int32_t padding_left = _ctx.at(padding_left_index).asScalar<int32_t>();
+ const int32_t padding_right = _ctx.at(padding_right_index).asScalar<int32_t>();
+ const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
+ const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
+
+ Stride stride;
+
+ stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
+ stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+ _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
+
+ // Set initializer for kernel
+ // Workaround for https://github.sec.samsung.net/STAR/nnfw/issues/2319
+ if (_ctx.at(ker_index).hasData())
+ {
+ const auto ker_shape = _ctx.at(ker_index).shape().asKernel();
+ auto ker_base = _ctx.at(ker_index).data().base();
+ auto ker_size = _ctx.at(ker_index).data().size();
+ auto ker_type = _ctx.at(ker_index).type();
+
+ switch (ker_type)
+ {
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initKernelTensor<float>, _1, ker_shape, ker_base, ker_size);
+ _builder.addInitializer(ker_index, initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer = std::bind(initKernelTensor<uint8_t>, _1, ker_shape, ker_base, ker_size);
+ _builder.addInitializer(ker_index, initializer);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported");
+ }
+ }
+ }
+
+ // Set initializer for bias
+ // See above comment.
+ if (_ctx.at(bias_index).hasData())
+ {
+ const auto bias_size = _ctx.at(bias_index).shape().asVector();
+ auto bias_base = _ctx.at(bias_index).data().base();
+ auto bias_type = _ctx.at(bias_index).type();
+
+ switch (bias_type)
+ {
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initVectorTensor<float>, _1, bias_base, bias_size);
+ _builder.addInitializer(bias_index, initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = std::bind(initVectorTensor<int32_t>, _1, bias_base, bias_size);
+ _builder.addInitializer(bias_index, initializer);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("Not supported");
+ }
+ }
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int ker_index;
+ int bias_index;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.ker_index = ker_index.asInt();
+ param.bias_index = bias_index.asInt();
+
+ param.stride = stride;
+
+ param.padding.left = padding_left;
+ param.padding.right = padding_right;
+ param.padding.top = padding_top;
+ param.padding.bottom = padding_bottom;
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+ auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+ const auto conv_info = asPadStrideInfo(param.padding, param.stride);
+ const auto fused_act = asActInfo(param.activation);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLConvolutionLayer> fn{new ::arm_compute::CLConvolutionLayer};
+
+ // To pass the fused_act parameter, it calls the WeightsInfo() and Size2D(1U, 1U) (dilation)
+ // functions like the default parameter.
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
+ conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U),
+ fused_act);
+
+ builder.append("Conv2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEConvolutionLayer> fn{new ::arm_compute::NEConvolutionLayer};
+
+ // To pass the fused_act parameter, it calls the WeightsInfo() and Size2D(1U, 1U) (dilation)
+ // functions like the default parameter.
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), fused_act);
+
+ builder.append("Conv2D", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index multiplier_index{node.param().multiplier_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+ const auto bias_size = _ctx.at(bias_index).shape().asVector();
+
+ auto multiplier = _ctx.at(multiplier_index).asScalar<int>();
+
+ assert(ker_shape.C == bias_size);
+ assert(ker_shape.C == ifm_shape.C * multiplier);
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ Stride stride;
+
+ stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
+ stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ // NOTE DepthwiseConv2D kernel is of shape [1, KER_W, KER_H, IFM_C * MULTIPLIER]
+ _builder.addShapeConstr(
+ ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+ _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int ker_index;
+ int bias_index;
+
+ Padding padding;
+ Stride stride;
+
+ int multipler;
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.ker_index = ker_index.asInt();
+ param.bias_index = bias_index.asInt();
+
+ param.stride = stride;
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+ : valid_padding();
+
+ param.multipler = multiplier;
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ VERBOSE(DepthwiseConv2D) << "OFM_C: " << ofm_shape.C << std::endl;
+ VERBOSE(DepthwiseConv2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(DepthwiseConv2D) << "OFM_W: " << ofm_shape.W << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "IFM_C: " << ifm_shape.C << std::endl;
+ VERBOSE(DepthwiseConv2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(DepthwiseConv2D) << "IFM_W: " << ifm_shape.W << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "KER_C: " << ker_shape.C << std::endl;
+ VERBOSE(DepthwiseConv2D) << "KER_H: " << ker_shape.H << std::endl;
+ VERBOSE(DepthwiseConv2D) << "KER_W: " << ker_shape.W << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "STRIDE_H: " << param.stride.vertical << std::endl;
+ VERBOSE(DepthwiseConv2D) << "STRIDE_W: " << param.stride.horizontal << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "ACTIVATION: " << param.activation << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "PAD(T): " << param.padding.top << std::endl;
+ VERBOSE(DepthwiseConv2D) << "PAD(B): " << param.padding.bottom << std::endl;
+ VERBOSE(DepthwiseConv2D) << "PAD(L): " << param.padding.left << std::endl;
+ VERBOSE(DepthwiseConv2D) << "PAD(R): " << param.padding.right << std::endl;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+ auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+ const auto conv_info = asPadStrideInfo(param.padding, param.stride);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
+ conv_info, param.multipler);
+
+ builder.append("DepthwiseConv2D", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
+
+ builder.append("DepthwiseConv2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index};
+ const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index};
+ const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index};
+ const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index};
+
+ const ::internal::tflite::operand::Index multiplier_index{node.param().multiplier_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+ const auto bias_size = _ctx.at(bias_index).shape().asVector();
+
+ auto multiplier = _ctx.at(multiplier_index).asScalar<int>();
+
+ assert(ker_shape.C == bias_size);
+ assert(ker_shape.C == ifm_shape.C * multiplier);
+
+ const int32_t padding_left = _ctx.at(padding_left_index).asScalar<int32_t>();
+ const int32_t padding_right = _ctx.at(padding_right_index).asScalar<int32_t>();
+ const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
+ const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
+
+ Stride stride;
+
+ stride.vertical = _ctx.at(vstride_index).asScalar<int32_t>();
+ stride.horizontal = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ // NOTE DepthwiseConv2D kernel is of shape [1, KER_W, KER_H, IFM_C * MULTIPLIER]
+ _builder.addShapeConstr(
+ ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+ _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int ker_index;
+ int bias_index;
+
+ Padding padding;
+ Stride stride;
+
+ int multipler;
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.ker_index = ker_index.asInt();
+ param.bias_index = bias_index.asInt();
+
+ param.stride = stride;
+
+ param.padding.left = padding_left;
+ param.padding.right = padding_right;
+ param.padding.top = padding_top;
+ param.padding.bottom = padding_bottom;
+
+ param.multipler = multiplier;
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ VERBOSE(DepthwiseConv2D) << "OFM_C: " << ofm_shape.C << std::endl;
+ VERBOSE(DepthwiseConv2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(DepthwiseConv2D) << "OFM_W: " << ofm_shape.W << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "IFM_C: " << ifm_shape.C << std::endl;
+ VERBOSE(DepthwiseConv2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(DepthwiseConv2D) << "IFM_W: " << ifm_shape.W << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "KER_C: " << ker_shape.C << std::endl;
+ VERBOSE(DepthwiseConv2D) << "KER_H: " << ker_shape.H << std::endl;
+ VERBOSE(DepthwiseConv2D) << "KER_W: " << ker_shape.W << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "STRIDE_H: " << param.stride.vertical << std::endl;
+ VERBOSE(DepthwiseConv2D) << "STRIDE_W: " << param.stride.horizontal << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "ACTIVATION: " << param.activation << std::endl;
+
+ VERBOSE(DepthwiseConv2D) << "PAD(T): " << param.padding.top << std::endl;
+ VERBOSE(DepthwiseConv2D) << "PAD(B): " << param.padding.bottom << std::endl;
+ VERBOSE(DepthwiseConv2D) << "PAD(L): " << param.padding.left << std::endl;
+ VERBOSE(DepthwiseConv2D) << "PAD(R): " << param.padding.right << std::endl;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+ auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+ const auto conv_info = asPadStrideInfo(param.padding, param.stride);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
+ conv_info, param.multipler);
+
+ builder.append("DepthwiseConv2D", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
+
+ builder.append("DepthwiseConv2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ assert(_ctx.at(input_index).shape().rank() >= 0 && _ctx.at(input_index).shape().rank() <= 4);
+ assert(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
+ assert(_ctx.at(input_index).type() == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM);
+ assert(_ctx.at(output_index).type() == ANEURALNETWORKS_TENSOR_FLOAT32);
+
+ // Set Shape Constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ {
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
+
+ l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
+ fn = std::move(l);
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ }
+
+ builder.append("Dequantize", std::move(fn));
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+ const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+
+ const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+ const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ uint32_t kw;
+ uint32_t kh;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.kh = kh;
+ param.kw = kw;
+
+ param.stride.vertical = vstride;
+ param.stride.horizontal = hstride;
+
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+ : valid_padding();
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_H: " << vstride << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_W: " << hstride << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(T): " << param.padding.top << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(L): " << param.padding.left << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(R): " << param.padding.right << std::endl;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
+ ::arm_compute::Size2D{param.kw, param.kh},
+ asPadStrideInfo(param.padding, param.stride)};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), info);
+
+ builder.append("MaxPool2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEPoolingLayer> fn{new ::arm_compute::NEPoolingLayer};
+
+ fn->configure(ifm_alloc, ofm_alloc, info);
+
+ builder.append("MaxPool2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+ const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index};
+ const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index};
+ const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index};
+ const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index};
+
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+
+ const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+ const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const int32_t padding_left = _ctx.at(padding_left_index).asScalar<int32_t>();
+ const int32_t padding_right = _ctx.at(padding_right_index).asScalar<int32_t>();
+ const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
+ const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ uint32_t kw;
+ uint32_t kh;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.kh = kh;
+ param.kw = kw;
+
+ param.stride.vertical = vstride;
+ param.stride.horizontal = hstride;
+
+ param.padding.left = padding_left;
+ param.padding.right = padding_right;
+ param.padding.top = padding_top;
+ param.padding.bottom = padding_bottom;
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_H: " << vstride << std::endl;
+ VERBOSE(MaxPool2D) << "STRIDE_W: " << hstride << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(T): " << param.padding.top << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(L): " << param.padding.left << std::endl;
+ VERBOSE(MaxPool2D) << "PAD(R): " << param.padding.right << std::endl;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
+ ::arm_compute::Size2D{param.kw, param.kh},
+ asPadStrideInfo(param.padding, param.stride)};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), info);
+
+ builder.append("MaxPool2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEPoolingLayer> fn{new ::arm_compute::NEPoolingLayer};
+
+ fn->configure(ifm_alloc, ofm_alloc, info);
+
+ builder.append("MaxPool2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+ const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+
+ const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+ const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ uint32_t kw;
+ uint32_t kh;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.kh = kh;
+ param.kw = kw;
+
+ param.stride.vertical = vstride;
+ param.stride.horizontal = hstride;
+
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+ : valid_padding();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_H: " << vstride << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_W: " << hstride << std::endl;
+ VERBOSE(AvgPool2D) << "PAD: " << to_string(padding_type) << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(T): " << param.padding.top << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(L): " << param.padding.left << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(R): " << param.padding.right << std::endl;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::PoolingLayerInfo info{
+ ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{param.kw, param.kh},
+ asPadStrideInfo(param.padding, param.stride), true /* exclude_padding */};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), info);
+
+ builder.append("AvgPool2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEPoolingLayer> fn{new ::arm_compute::NEPoolingLayer};
+
+ fn->configure(ifm_alloc, ofm_alloc, info);
+
+ builder.append("AvgPool2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+ const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index};
+ const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index};
+ const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index};
+ const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index};
+
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+
+ const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+ const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const int32_t padding_left = _ctx.at(padding_left_index).asScalar<int32_t>();
+ const int32_t padding_right = _ctx.at(padding_right_index).asScalar<int32_t>();
+ const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
+ const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ uint32_t kw;
+ uint32_t kh;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.kh = kh;
+ param.kw = kw;
+
+ param.stride.vertical = vstride;
+ param.stride.horizontal = hstride;
+
+ param.padding.left = padding_left;
+ param.padding.right = padding_right;
+ param.padding.top = padding_top;
+ param.padding.bottom = padding_bottom;
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
+ VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
+ VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
+ VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_H: " << vstride << std::endl;
+ VERBOSE(AvgPool2D) << "STRIDE_W: " << hstride << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(T): " << param.padding.top << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(B): " << param.padding.bottom << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(L): " << param.padding.left << std::endl;
+ VERBOSE(AvgPool2D) << "PAD(R): " << param.padding.right << std::endl;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::PoolingLayerInfo info{
+ ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{param.kw, param.kh},
+ asPadStrideInfo(param.padding, param.stride), true /* exclude_padding */};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), info);
+
+ builder.append("AvgPool2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEPoolingLayer> fn{new ::arm_compute::NEPoolingLayer};
+
+ fn->configure(ifm_alloc, ofm_alloc, info);
+
+ builder.append("AvgPool2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Concat::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ // NOTE This implementation assumes that inputs and output are a feature
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+ uint32_t input_rank = ofm_shape.rank();
+ int32_t axis = _ctx.at(axis_index).asScalar<int32_t>();
+
+ // Handle negative axis
+ if (axis < 0)
+ {
+ axis += input_rank;
+ }
+
+ // Set Shape Constraints and TensorInfo (for output)
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ // Set Shape Constraints and TensorInfo (for input)
+ const uint32_t coord_index = ToARMComputeAxis(input_rank, axis).value();
+ uint32_t depth = 0;
+
+ ::arm_compute::Coordinates coordinates;
+ coordinates.set_num_dimensions(input_rank);
+
+ for (const auto &index : node.param().ifm_indexes)
+ {
+ const ::internal::tflite::operand::Index ifm_index{index};
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+
+ coordinates[coord_index] = depth;
+
+ _builder.addSubsumptionConstr(ifm_index, ofm_index, coordinates,
+ asTensorShape(_ctx.at(ifm_index).shape()), true);
+
+ depth += ifm_shape.dim(axis);
+ }
+
+ // NOTE Concat has no actual operation!
+ // However, dummy stage is added because profiler assumes every operation make a stage.
+ auto stage = [](const IAllocationContext &ctx, IExecutionBuilder &builder) {};
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
+{
+ VERBOSE(FullyConnected) << "Configure FULLY_CONNECTED operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index weight_index{node.param().weight_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ assert(_ctx.at(input_index).shape().rank() >= 2);
+ assert(_ctx.at(output_index).shape().rank() == 2);
+ assert(_ctx.at(weight_index).shape().rank() == 2);
+ assert(_ctx.at(bias_index).shape().rank() == 1);
+
+ const auto input_rank = _ctx.at(input_index).shape().rank();
+ // TODO Currently we are not handling where the case is that the input's rank is 3.
+ // The handling should be added in the future.
+ assert(input_rank != 3);
+
+ const auto output_size = _ctx.at(output_index).shape().dim(1);
+ assert(_ctx.at(bias_index).shape().dim(0) == output_size);
+ assert(_ctx.at(weight_index).shape().dim(0) == output_size);
+ const auto batch_size = _ctx.at(output_index).shape().dim(0);
+ const auto input_size = _ctx.at(weight_index).shape().dim(1);
+
+ // Check for reshaping input's shape into rank-2
+ bool needs_reshape = false;
+ internal::tflite::operand::Shape reshape(2);
+ if (input_rank == 4)
+ {
+ nnfw::misc::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
+ auto feature_size =
+ ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W;
+ assert(feature_size == batch_size * input_size);
+
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // for reshaping
+ needs_reshape = true;
+ reshape.dim(0) = batch_size; /* H */
+ reshape.dim(1) = input_size; /* W */
+ }
+ else if (input_rank == 2)
+ {
+ auto ifm_shape = _ctx.at(input_index).shape();
+ nnfw::misc::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix();
+ assert(ifm_shape.dim(0) == batch_size);
+ assert(ifm_shape.dim(1) == input_size);
+
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+ }
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(weight_index,
+ asTensorInfo(asTensorShape(_ctx.at(weight_index).shape()),
+ _ctx.at(weight_index).type(), _ctx.at(weight_index).scale(),
+ _ctx.at(weight_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+
+ int input_index;
+ int weight_index;
+ int bias_index;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.weight_index = weight_index.asInt();
+ param.bias_index = bias_index.asInt();
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param, needs_reshape, reshape](const IAllocationContext &ctx,
+ IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ auto weight_alloc = ctx.at(::internal::tflite::operand::Index{param.weight_index});
+ auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+
+ auto fn = nnfw::cpp14::make_unique<arm_compute::CLFullyConnectedReshapingLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(weight_alloc), CAST_CL(bias_alloc),
+ CAST_CL(output_alloc), needs_reshape, asTensorShape(reshape));
+
+ builder.append("FullyConnected", std::move(fn));
+
+ ActivationBuilder{builder}.append(param.activation, output_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index height_index{node.param().height_index};
+ const ::internal::tflite::operand::Index width_index{node.param().width_index};
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ // Set Shape Constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ int new_height;
+ int new_width;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.new_height = _ctx.at(height_index).asScalar<int32_t>();
+ param.new_width = _ctx.at(width_index).asScalar<int32_t>();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
+ ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
+
+ builder.append("ResizeBilinear", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ auto input_shape = asTensorShape(_ctx.at(input_index).shape(), false);
+ auto output_shape = asTensorShape(_ctx.at(output_index).shape(), false);
+
+ assert(input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] ==
+ output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]);
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ _builder.addShapeConstr(output_index, asTensorInfo(output_shape, _ctx.at(output_index).type(),
+ _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index, asTensorInfo(input_shape, _ctx.at(input_index).type(),
+ _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ // GenericReshape first apply NCHW->NHWC permutation, and apply reshape
+ auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
+
+ builder.append("Reshape", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ builder.append("Reshape", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
+{
+ // node.param().dims_index_optional is ignored since output tensor already has squeezed shape
+ // by freezer and toco
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set Shape Constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
+
+ builder.append("Squeeze", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReshapeLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ builder.append("Squeeze", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
+{
+ VERBOSE(Softmax) << "Configure SOFTMAX operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index scale_index{node.param().scale_index};
+
+ assert(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ assert(_ctx.at(scale_index).shape().rank() == 0);
+
+ // TODO Should move to the place where the operand is handled, if it is possible.
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ float scale;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.scale = _ctx.at(scale_index).asScalar<float>();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.scale);
+
+ builder.append("Softmax", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>();
+
+ fn->configure(input_alloc, output_alloc, param.scale);
+
+ builder.append("Softmax", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
+{
+ VERBOSE(StridedSlice) << "Configure STRIDED_SLICE operation" << std::endl;
+
+ const ::internal::tflite::operand::Index outputData_index{node.param().outputData_index};
+
+ const ::internal::tflite::operand::Index inputData_index{node.param().inputData_index};
+ const ::internal::tflite::operand::Index startData_index{node.param().startData_index};
+ const ::internal::tflite::operand::Index endData_index{node.param().endData_index};
+ const ::internal::tflite::operand::Index stridesData_index{node.param().stridesData_index};
+ const ::internal::tflite::operand::Index beginMask_index{node.param().beginMask_index};
+ const ::internal::tflite::operand::Index endMask_index{node.param().endMask_index};
+ const ::internal::tflite::operand::Index shrinkAxisMask_index{node.param().shrinkAxisMask_index};
+
+ // Set Shape Constraints
+ _builder.addShapeConstr(outputData_index,
+ asTensorInfo(asTensorShape(_ctx.at(outputData_index).shape()),
+ _ctx.at(outputData_index).type(),
+ _ctx.at(outputData_index).scale(),
+ _ctx.at(outputData_index).zeroPoint()));
+ _builder.addShapeConstr(
+ inputData_index,
+ asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()), _ctx.at(inputData_index).type(),
+ _ctx.at(inputData_index).scale(), _ctx.at(inputData_index).zeroPoint()));
+
+ assert(_ctx.at(startData_index).shape().rank() == 1);
+ assert(_ctx.at(endData_index).shape().rank() == 1);
+ assert(_ctx.at(stridesData_index).shape().rank() == 1);
+ _builder.addShapeConstr(
+ startData_index,
+ asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()), _ctx.at(startData_index).type(),
+ _ctx.at(startData_index).scale(), _ctx.at(startData_index).zeroPoint()));
+ _builder.addShapeConstr(endData_index, asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()),
+ _ctx.at(endData_index).type(),
+ _ctx.at(endData_index).scale(),
+ _ctx.at(endData_index).zeroPoint()));
+ _builder.addShapeConstr(
+ stridesData_index,
+ asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()), _ctx.at(stridesData_index).type(),
+ _ctx.at(stridesData_index).scale(), _ctx.at(stridesData_index).zeroPoint()));
+
+ // Set initializers for indices data such as order of inputData
+ int input_rank = _ctx.at(inputData_index).shape().rank();
+ std::vector<int32_t> starts;
+ std::vector<int32_t> ends;
+ std::vector<int32_t> strides;
+ starts.resize(input_rank, 0);
+ ends.resize(input_rank, 0);
+ strides.resize(input_rank, 0);
+ {
+ auto input_shape = _ctx.at(inputData_index).shape();
+ auto startData_base = _ctx.at(startData_index).data().base();
+ auto endData_base = _ctx.at(endData_index).data().base();
+ auto stridesData_base = _ctx.at(stridesData_index).data().base();
+ const auto startData_size = _ctx.at(startData_index).shape().asVector();
+ const auto endData_size = _ctx.at(endData_index).shape().asVector();
+ const auto stridesData_size = _ctx.at(stridesData_index).shape().asVector();
+
+ assert(_ctx.at(startData_index).type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(_ctx.at(endData_index).type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(_ctx.at(stridesData_index).type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(startData_size == input_rank);
+ assert(endData_size == input_rank);
+ assert(stridesData_size == input_rank);
+
+ assert(startData_base != nullptr);
+ for (uint32_t n = 0; n < input_rank; ++n)
+ {
+ auto axis = ToARMComputeAxis(input_rank, n).value();
+
+ int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
+ starts[axis] = start_value;
+
+ int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
+ ends[axis] = end_value;
+
+ int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
+ strides[axis] = strides_value;
+ }
+ }
+
+ struct Param
+ {
+ int32_t outputData_index;
+ int32_t inputData_index;
+
+ std::vector<int32_t> starts;
+ std::vector<int32_t> ends;
+ std::vector<int32_t> strides;
+
+ int32_t beginMask;
+ int32_t endMask;
+ int32_t shrinkAxisMask;
+ };
+
+ Param param;
+ param.outputData_index = outputData_index.asInt();
+ param.inputData_index = inputData_index.asInt();
+
+ param.starts = starts;
+ param.ends = ends;
+ param.strides = strides;
+
+ // Set mask bits such as order of inputData
+ param.beginMask = _ctx.at(beginMask_index).asReorderBits<int32_t>(input_rank);
+ param.endMask = _ctx.at(endMask_index).asReorderBits<int32_t>(input_rank);
+ param.shrinkAxisMask = _ctx.at(shrinkAxisMask_index).asReorderBits<int32_t>(input_rank);
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto outputData_alloc = ctx.at(::internal::tflite::operand::Index{param.outputData_index});
+ auto inputData_alloc = ctx.at(::internal::tflite::operand::Index{param.inputData_index});
+
+ ::arm_compute::Coordinates starts;
+ ::arm_compute::Coordinates ends;
+ ::arm_compute::BiStrides strides;
+ for (int i = 0; i < param.starts.size(); ++i)
+ {
+ starts.set(i, param.starts[i]);
+ ends.set(i, param.ends[i]);
+ strides.set(i, param.strides[i]);
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSlice>();
+
+ fn->configure(CAST_CL(inputData_alloc), CAST_CL(outputData_alloc), starts, ends, strides,
+ param.beginMask, param.endMask, param.shrinkAxisMask);
+
+ builder.append("StridedSlice", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node)
+{
+ VERBOSE(ReduceMin) << "Configure REDUCEMIN operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto axis_shape = _ctx.at(axis_index).shape();
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ std::set<uint32_t> axis;
+ {
+ const auto ifm_rank = ifm_shape.rank();
+ switch (axis_shape.rank())
+ {
+ case 0: // scalar
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ break;
+ }
+ case 1: // vector
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have
+ // to find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported");
+ break;
+ }
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::set<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::MIN);
+
+ builder.append("ReduceMin", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
+{
+ VERBOSE(ReduceMax) << "Configure REDUCEMAX operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto axis_shape = _ctx.at(axis_index).shape();
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ std::set<uint32_t> axis;
+ {
+ const auto ifm_rank = ifm_shape.rank();
+ switch (axis_shape.rank())
+ {
+ case 0: // scalar
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ break;
+ }
+ case 1: // vector
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have
+ // to find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported");
+ break;
+ }
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::set<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::MAX);
+
+ builder.append("ReduceMax", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Cast::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ assert(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int input_index;
+ int output_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ {
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
+
+ l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
+ fn = std::move(l);
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ }
+
+ builder.append("Cast", std::move(fn));
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node)
+{
+ const ::internal::tflite::operand::Index outputValues_index{node.param().outputValues_index};
+ const ::internal::tflite::operand::Index outputIndices_index{node.param().outputIndices_index};
+
+ const ::internal::tflite::operand::Index inputData_index{node.param().inputData_index};
+ const ::internal::tflite::operand::Index k_index{node.param().k_index};
+
+ // Currently, we only support the vector input.
+ assert(_ctx.at(inputData_index).shape().rank() == 1 ||
+ _ctx.at(inputData_index).shape().rank() == 2);
+
+ const int32_t k = _ctx.at(k_index).asScalar<int32_t>();
+
+ // Set shape constraints
+ _builder.addShapeConstr(outputValues_index,
+ asTensorInfo(asTensorShape(_ctx.at(outputValues_index).shape()),
+ _ctx.at(outputValues_index).type(),
+ _ctx.at(outputValues_index).scale(),
+ _ctx.at(outputValues_index).zeroPoint()));
+ _builder.addShapeConstr(outputIndices_index,
+ asTensorInfo(asTensorShape(_ctx.at(outputIndices_index).shape()),
+ _ctx.at(outputIndices_index).type(),
+ _ctx.at(outputIndices_index).scale(),
+ _ctx.at(outputIndices_index).zeroPoint()));
+ _builder.addShapeConstr(
+ inputData_index,
+ asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()), _ctx.at(inputData_index).type(),
+ _ctx.at(inputData_index).scale(), _ctx.at(inputData_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int32_t outputValues_index;
+ int32_t outputIndices_index;
+
+ int32_t inputData_index;
+ int32_t k;
+ };
+
+ Param param;
+
+ param.outputValues_index = outputValues_index.asInt();
+ param.outputIndices_index = outputIndices_index.asInt();
+ param.inputData_index = inputData_index.asInt();
+ param.k = k;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.outputValues_index});
+ auto indices_alloc = ctx.at(::internal::tflite::operand::Index{param.outputIndices_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.inputData_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>();
+
+ fn->configure(CAST_CL(input_alloc), param.k, CAST_CL(values_alloc), CAST_CL(indices_alloc));
+
+ builder.append("TopKV2", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index indices_index{node.param().indices_index};
+
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto indices_shape = _ctx.at(indices_index).shape();
+ const auto axis_shape = _ctx.at(axis_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ assert(ifm_shape.rank() <= 4);
+ assert(indices_shape.rank() <= 3);
+ assert(ofm_shape.rank() <= 4);
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0);
+
+ // Set Shape Constraints
+ _builder.addShapeConstr(ofm_index,
+ asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+ _ctx.at(ofm_index).type(), _ctx.at(ofm_index).scale(),
+ _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(ifm_index,
+ asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+ _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(),
+ _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ indices_index, asTensorInfo(asTensorShape(_ctx.at(indices_index).shape(), false),
+ _ctx.at(indices_index).type(), _ctx.at(indices_index).scale(),
+ _ctx.at(indices_index).zeroPoint()));
+
+ const int32_t axis_value = static_cast<int>(_ctx.at(axis_index).asScalar<int32_t>());
+ const int axis = ToARMComputeAxis(ifm_shape.rank(), axis_value).value();
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int indices_index;
+
+ int axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.indices_index = indices_index.asInt();
+
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto indices_alloc = ctx.at(::internal::tflite::operand::Index{param.indices_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ auto l = nnfw::cpp14::make_unique<GenericGather>();
+ l->configure(CAST_CL(ifm_alloc), CAST_CL(indices_alloc), CAST_CL(ofm_alloc), param.axis);
+ fn = std::move(l);
+ builder.append("Gather", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::PReLU::Node &node)
+{
+ VERBOSE(PReLU) << "Configure PReLU operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ if (!(_ctx.at(ifm_index).shape() == _ctx.at(alpha_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(ifm_index).shape().rank(), _ctx.at(alpha_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(ifm_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(alpha_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ _builder.addShapeConstr(alpha_index,
+ asTensorInfo(asTensorShape(_ctx.at(alpha_index).shape()),
+ _ctx.at(alpha_index).type(), _ctx.at(alpha_index).scale(),
+ _ctx.at(alpha_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int alpha_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.alpha_index = alpha_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto alpha_alloc = ctx.at(::internal::tflite::operand::Index{param.alpha_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>();
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(alpha_alloc), CAST_CL(ofm_alloc));
+ builder.append("PReLU", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
+{
+ VERBOSE(ReLU) << "Configure ReLU operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
+
+ builder.append("ReLU", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+ builder.append("ReLU", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
+{
+ VERBOSE(ReLU1) << "Configure ReLU1 operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
+
+ builder.append("ReLU1", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+ builder.append("ReLU1", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
+{
+ VERBOSE(ReLU6) << "Configure ReLU6 operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
+
+ builder.append("ReLU6", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+ builder.append("ReLU6", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
+{
+ VERBOSE(Tanh) << "Configure Tanh operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
+
+ builder.append("Tanh", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+ builder.append("Tanh", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Logistic::Node &node)
+{
+ VERBOSE(Logistic) << "Configure Logistic operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
+
+ builder.append("Logistic", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+ builder.append("Logistic", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+// Reduce Mean
+void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
+{
+ VERBOSE(Mean) << "Configure Mean operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+ const ::internal::tflite::operand::Index keep_dims_index{node.param().keep_dims_index};
+ const int keep_dims = _ctx.at(keep_dims_index).asScalar<int>();
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(axis_index,
+ asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()),
+ _ctx.at(axis_index).type(), _ctx.at(axis_index).scale(),
+ _ctx.at(axis_index).zeroPoint()));
+
+ std::set<uint32_t> axis;
+ {
+ const auto ifm_rank = ifm_shape.rank();
+ const auto axis_shape = _ctx.at(axis_index).shape();
+ switch (axis_shape.rank())
+ {
+ case 0: // scalar
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ break;
+ }
+ case 1: // vector
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have
+ // to find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_rank;
+ }
+ axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported");
+ break;
+ }
+ }
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ bool keep_dims;
+ std::set<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.keep_dims = keep_dims > 0 ? true : false;
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::Coordinates reduction_axis;
+ size_t i = 0;
+ for (auto index : param.axis)
+ {
+ reduction_axis.set(i++, index);
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceMean>();
+
+ fn->configure(CAST_CL(ifm_alloc), reduction_axis, param.keep_dims, CAST_CL(ofm_alloc));
+
+ builder.append("Mean", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::RNN::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index hidden_state_out_index{
+ node.param().hidden_state_out_index};
+
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index weights_index{node.param().weights_index};
+ const ::internal::tflite::operand::Index recurrent_weights_index{
+ node.param().recurrent_weights_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+ const ::internal::tflite::operand::Index hidden_state_in_index{
+ node.param().hidden_state_in_index};
+ const ::internal::tflite::operand::Index fused_activation_index{
+ node.param().fused_activation_index};
+
+ assert(_ctx.at(output_index).shape().rank() == 2 &&
+ _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
+ _ctx.at(input_index).shape().rank() == 2 && _ctx.at(weights_index).shape().rank() == 2 &&
+ _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
+ _ctx.at(hidden_state_in_index).shape().rank() == 2);
+ assert(_ctx.at(bias_index).shape().rank() == 1);
+
+ const auto batch_size = _ctx.at(output_index).shape().dim(0);
+ assert(batch_size == _ctx.at(input_index).shape().dim(0) &&
+ batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
+ batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
+ assert(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
+
+ const auto num_units = _ctx.at(output_index).shape().dim(1);
+ assert(num_units == _ctx.at(weights_index).shape().dim(0) &&
+ num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
+ num_units == _ctx.at(bias_index).shape().dim(0));
+ assert(num_units == _ctx.at(output_index).shape().dim(1) &&
+ num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
+ num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
+ num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(hidden_state_out_index,
+ asTensorInfo(asTensorShape(_ctx.at(hidden_state_out_index).shape()),
+ _ctx.at(hidden_state_out_index).type(),
+ _ctx.at(hidden_state_out_index).scale(),
+ _ctx.at(hidden_state_out_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+ _builder.addShapeConstr(weights_index, asTensorInfo(asTensorShape(_ctx.at(weights_index).shape()),
+ _ctx.at(weights_index).type(),
+ _ctx.at(weights_index).scale(),
+ _ctx.at(weights_index).zeroPoint()));
+ _builder.addShapeConstr(recurrent_weights_index,
+ asTensorInfo(asTensorShape(_ctx.at(recurrent_weights_index).shape()),
+ _ctx.at(recurrent_weights_index).type(),
+ _ctx.at(recurrent_weights_index).scale(),
+ _ctx.at(recurrent_weights_index).zeroPoint()));
+ _builder.addShapeConstr(bias_index,
+ asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+ _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+ _ctx.at(bias_index).zeroPoint()));
+ _builder.addShapeConstr(hidden_state_in_index,
+ asTensorInfo(asTensorShape(_ctx.at(hidden_state_in_index).shape()),
+ _ctx.at(hidden_state_in_index).type(),
+ _ctx.at(hidden_state_in_index).scale(),
+ _ctx.at(hidden_state_in_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int hidden_state_out_index;
+
+ int input_index;
+ int weights_index;
+ int recurrent_weights_index;
+ int bias_index;
+ int hidden_state_in_index;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.hidden_state_out_index = hidden_state_out_index.asInt();
+
+ param.input_index = input_index.asInt();
+ param.weights_index = weights_index.asInt();
+ param.recurrent_weights_index = recurrent_weights_index.asInt();
+ param.bias_index = bias_index.asInt();
+ param.hidden_state_in_index = hidden_state_in_index.asInt();
+ param.activation = static_cast<FuseCode>(_ctx.at(fused_activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto hidden_state_out_alloc =
+ ctx.at(::internal::tflite::operand::Index{param.hidden_state_out_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ auto weights_alloc = ctx.at(::internal::tflite::operand::Index{param.weights_index});
+ auto recurrent_weights_alloc =
+ ctx.at(::internal::tflite::operand::Index{param.recurrent_weights_index});
+ auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
+ auto hidden_state_in_alloc =
+ ctx.at(::internal::tflite::operand::Index{param.hidden_state_in_index});
+ auto act_info = asActivationInfo(param.activation);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLCopy> copy_fn{new ::arm_compute::CLCopy};
+ copy_fn->configure(CAST_CL(hidden_state_in_alloc), CAST_CL(hidden_state_out_alloc));
+ builder.append("COPY", std::move(copy_fn));
+
+ std::unique_ptr<::arm_compute::CLRNNLayer> rnn_fn{new ::arm_compute::CLRNNLayer};
+
+ // The hidden_state_in's data must be copied to hidden_state_out_alloc before fn->run() is
+ // performed.
+ rnn_fn->configure(CAST_CL(input_alloc), CAST_CL(weights_alloc),
+ CAST_CL(recurrent_weights_alloc), CAST_CL(bias_alloc),
+ CAST_CL(hidden_state_out_alloc), CAST_CL(output_alloc), act_info);
+
+ builder.append("RNN", std::move(rnn_fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LSTM::Node &node)
+{
+ // TODO Implement LSTM op
+ throw std::runtime_error("Not supported, yet");
+}
+
+void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
+{
+ VERBOSE(Transpose) << "Configure Transpose operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index permu_index{node.param().permu_index};
+
+ assert(_ctx.at(ifm_index).shape().rank() == _ctx.at(ofm_index).shape().rank());
+ assert(_ctx.at(permu_index).hasData() == true);
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ const int32_t *pv;
+ int rank;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.pv = reinterpret_cast<const int32_t *>(_ctx.at(permu_index).data().base());
+ param.rank = _ctx.at(ifm_index).shape().rank();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ const auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermute>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
+ getARMComputePermutationVector(param.rank, param.pv));
+
+ builder.append("Transpose", std::move(fn));
+ }
+ else
+ {
+ throw std::runtime_error("Not supported, yet");
+ }
+
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
+{
+ VERBOSE(Floor) << "Configure Floor operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().output_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().input_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+
+ builder.append("Floor", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ builder.append("Floor", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ArgMax::Node &node)
+{
+ VERBOSE(ArgMax) << "Configure ARGMAX operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ auto ifm_shape = _ctx.at(ifm_index).shape();
+ auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto axis_shape = _ctx.at(axis_index).shape();
+
+ assert(_ctx.at(axis_index).hasData());
+ // Axis dimension is always 1.
+ assert(axis_shape.rank() == 1);
+ assert((ifm_shape.rank() - 1) == ofm_shape.rank());
+
+ _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+ _ctx.at(ofm_index).type()));
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+ _ctx.at(ifm_index).type()));
+
+ std::vector<uint32_t> l_axis;
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+ auto axis_base = _ctx.at(axis_index).data().base();
+ auto axis_type = _ctx.at(axis_index).type();
+ // TODO Should support axis size > 1.
+ assert(axis_size == 1);
+ // axis is tensor with 1 dimension - always a vector.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += ifm_shape.rank();
+ }
+ l_axis.push_back(ToARMComputeAxis(ifm_shape.rank(), axis_value).value());
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::vector<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = l_axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ {
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgOperation>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ArgOperation::MAX);
+
+ builder.append("ArgMax", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SQRT::Node &node)
+{
+ VERBOSE(SQRT) << "Configure SQRT operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+
+ {
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
+
+ builder.append("SQRT", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(input_alloc, output_alloc, act_info);
+
+ builder.append("SQRT", std::move(fn));
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::RSQRT::Node &node)
+{
+ VERBOSE(RSQRT) << "Configure Rsqrt operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLRsqrtLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
+
+ builder.append("RSQRT", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Equal::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparison>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::ComparisonOperation::Equal);
+
+ builder.append("Equal", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
+{
+ VERBOSE(TransposeConv) << "Configure TransposeConv operation" << std::endl;
+
+ const ::internal::tflite::operand::Index op_shape_index{node.param().op_shape_index};
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+
+ // Only 4D tensors are supported
+ assert(_ctx.at(ofm_index).shape().rank() == 4);
+ assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
+ assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+ const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+
+ assert(_ctx.at(padding_index).hasData() == true);
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ assert(vstride > 0);
+ assert(hstride > 0);
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+ assert(ifm_shape.N == ofm_shape.N);
+ assert(ifm_shape.C == ker_shape.C);
+ assert(ker_shape.N == ofm_shape.C);
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+ _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int ker_index;
+ Padding padding;
+ Stride stride;
+ uint32_t invalid_horizontal;
+ uint32_t invalid_vertical;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.ker_index = ker_index.asInt();
+
+ param.stride.horizontal = hstride;
+ param.stride.vertical = vstride;
+
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ofm_shape, ifm_shape, param.stride, ker_shape.W, ker_shape.H)
+ : valid_padding();
+
+ param.invalid_horizontal =
+ (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? 0
+ : ofm_shape.W - (1 + (ifm_shape.W - 1) * hstride) - (ker_shape.W - 1);
+ param.invalid_vertical =
+ (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? 0
+ : ofm_shape.H - (1 + (ifm_shape.H - 1) * param.stride.vertical) - (ker_shape.H - 1);
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+ auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+
+ // Only rank 4 is supported
+ const int rank = 4;
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTransposeConvLayer>();
+
+ auto symmetric_tconv_info = asPadStrideInfo(param.padding, param.stride);
+
+ // TODO Support WeightInfo in some cases in order to performance improvement
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), nullptr, CAST_CL(ofm_alloc),
+ symmetric_tconv_info, param.invalid_horizontal, param.invalid_vertical);
+ builder.append("TransposeConv", std::move(fn));
+ }
+ else
+ {
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
+ const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ if (!(_ctx.at(lhs_index).shape() == _ctx.at(rhs_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(lhs_index).shape().rank(), _ctx.at(rhs_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(lhs_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(rhs_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(
+ lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+ _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+ _builder.addShapeConstr(
+ rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+ _ctx.at(rhs_index).scale(), _ctx.at(rhs_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int lhs_index;
+ int rhs_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.lhs_index = lhs_index.asInt();
+ param.rhs_index = rhs_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
+ auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+
+ fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
+ builder.append("SquaredDifference", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Split::Node &node)
+{
+ VERBOSE(Split) << "Configure Split operation" << std::endl;
+
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ int32_t axis = _ctx.at(axis_index).asScalar<int32_t>();
+
+ // Handle negative axis
+ if (axis < 0)
+ {
+ axis += ifm_shape.rank();
+ }
+
+ const int32_t num_split = node.param().ofm_indexes.size();
+ const auto input_size = ifm_shape.dim(axis);
+ assert(input_size % num_split == 0);
+ const int32_t slice_size = input_size / num_split;
+
+ // Set Shape Constraints and TensorInfo (for input)
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+ _ctx.at(ifm_index).type()));
+
+ // Set Shape Constraints and TensorInfo (for output)
+ const auto rank = ifm_shape.rank();
+ const uint32_t coord_index = ToARMComputeAxis(rank, axis).value();
+ uint32_t depth = 0;
+
+ ::arm_compute::Coordinates coordinates;
+ coordinates.set_num_dimensions(rank);
+
+ for (const auto &index : node.param().ofm_indexes)
+ {
+ const ::internal::tflite::operand::Index ofm_index{index};
+
+ coordinates[coord_index] = depth;
+
+ _builder.addSubsumptionConstr(ofm_index, ifm_index, coordinates,
+ asTensorShape(_ctx.at(ofm_index).shape()), true);
+ depth += slice_size;
+ }
+
+ // NOTE Split has no actual operation!
+}
+
+void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index};
+
+ assert(_ctx.at(paddings_index).hasData() == true);
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(ifm_index,
+ asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+ _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(),
+ _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(ofm_index,
+ asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+ _ctx.at(ofm_index).type(), _ctx.at(ofm_index).scale(),
+ _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ paddings_index, asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape(), false),
+ _ctx.at(paddings_index).type(), _ctx.at(paddings_index).scale(),
+ _ctx.at(paddings_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ ::arm_compute::PixelValue pixel_value;
+ ::arm_compute::PaddingList padding_list;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ // initializer for padding
+ auto rank = _ctx.at(ifm_index).shape().rank();
+ auto pad_type = _ctx.at(paddings_index).type();
+
+ if (pad_type == ANEURALNETWORKS_TENSOR_INT32)
+ {
+ auto pad_base = _ctx.at(paddings_index).data().base();
+ auto pad_shape = _ctx.at(paddings_index).shape();
+
+ param.padding_list.resize(rank);
+ for (int32_t n = 0; n < rank; ++n)
+ {
+ const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * pad_shape.dim(1));
+ auto axis = ToARMComputeAxis(rank, n).value();
+
+ param.padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
+ }
+ auto data_type = asDataType(_ctx.at(ifm_index).type());
+ auto quant_info =
+ asQuantizationInfo(_ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint());
+ param.pixel_value = ::arm_compute::PixelValue{0, data_type, quant_info};
+ }
+ else
+ {
+ throw std::runtime_error("Only Int32 datatype is supported for Pad values");
+ }
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ {
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.padding_list,
+ param.pixel_value);
+
+ builder.append("PAD", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+ const auto input_batch = _ctx.at(input_index).shape().dim(0);
+ const auto output_batch = _ctx.at(output_index).shape().dim(0);
+ const auto input_depth = _ctx.at(input_index).shape().dim(3);
+ const auto output_depth = _ctx.at(output_index).shape().dim(3);
+ const auto block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+ const auto input_height = _ctx.at(input_index).shape().dim(1);
+ const auto input_width = _ctx.at(input_index).shape().dim(2);
+
+ // All assertions as per NNAPI specification.
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+ assert((block_size >= 1) && (input_height % block_size == 0) && (input_width % block_size == 0));
+ assert(input_batch == output_batch);
+ assert(input_depth * block_size * block_size == output_depth);
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int32_t block_size;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size = block_size;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ {
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+
+ builder.append("SpaceToDepth", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+ const ::internal::tflite::operand::Index padding_size_index{node.param().padding_size_index};
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &padding_size_shape = _ctx.at(padding_size_index).shape();
+ auto block_size_base = reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base());
+ auto padding_size_base =
+ reinterpret_cast<const int32_t *>(_ctx.at(padding_size_index).data().base());
+
+ { // New block for assertions
+ const auto &block_size_shape = _ctx.at(block_size_index).shape();
+
+ // Currently, only 4D NHWC input/output op_context are supported.
+ // The 4D array need to have exactly 2 spatial dimensions.
+ // TODO: Support arbitrary dimension in SpaceToBatchND.
+ assert(input_shape.rank() == 4);
+ assert(output_shape.rank() == 4);
+ assert(block_size_shape.rank() == 1);
+ assert(padding_size_shape.rank() == 2);
+
+ assert(output_shape.dim(3) == input_shape.dim(3));
+ assert(block_size_shape.dim(0) == 2);
+ assert(padding_size_shape.dim(0) == 2);
+ assert(padding_size_shape.dim(1) == 2);
+
+ assert(_ctx.at(block_size_index).hasData() && _ctx.at(padding_size_index).hasData());
+ assert(_ctx.at(block_size_index).type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(_ctx.at(padding_size_index).type() == ANEURALNETWORKS_TENSOR_INT32);
+
+ assert(block_size_base[0] > 0 && block_size_base[1] > 0);
+ assert(output_shape.dim(0) == input_shape.dim(0) * block_size_base[0] * block_size_base[1]);
+ assert(output_shape.dim(1) ==
+ (input_shape.dim(1) + padding_size_base[0] + padding_size_base[1]) / block_size_base[0]);
+ assert(output_shape.dim(2) ==
+ (input_shape.dim(2) + padding_size_base[2] + padding_size_base[3]) / block_size_base[1]);
+ }
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ _builder.addShapeConstr(block_size_index,
+ asTensorInfo(asTensorShape(_ctx.at(block_size_index).shape()),
+ _ctx.at(block_size_index).type(),
+ _ctx.at(block_size_index).scale(),
+ _ctx.at(block_size_index).zeroPoint()));
+
+ _builder.addShapeConstr(padding_size_index,
+ asTensorInfo(asTensorShape(_ctx.at(padding_size_index).shape()),
+ _ctx.at(padding_size_index).type(),
+ _ctx.at(padding_size_index).scale(),
+ _ctx.at(padding_size_index).zeroPoint()));
+
+ { // Append block_size initializer
+ auto initializer = [block_size_base](::arm_compute::ITensor &tensor) {
+ const auto block_size_y = block_size_base[0];
+ const auto block_size_x = block_size_base[1];
+
+ auto into = reinterpret_cast<int32_t *>(tensor.ptr_to_element({0}));
+ into[0] = block_size_x;
+ into[1] = block_size_y;
+ };
+ _builder.addInitializer(block_size_index, initializer);
+ }
+
+ { // Append padding_size initializer
+ auto initializer = [padding_size_base, padding_size_shape](::arm_compute::ITensor &tensor) {
+ // If n == 0, then the axis is the height
+ // If n == 1, then the axis is the width
+ for (size_t n = 0; n < padding_size_shape.dim(0); ++n)
+ {
+ const auto from = padding_size_base + (n * padding_size_shape.dim(1));
+ auto into = reinterpret_cast<int32_t *>(tensor.ptr_to_element({0, 1 - n}));
+ into[0] = from[0];
+ into[1] = from[1];
+ }
+ };
+ _builder.addInitializer(padding_size_index, initializer);
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int block_size_index;
+ int padding_size_index;
+ int32_t rank;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size_index = block_size_index.asInt();
+ param.padding_size_index = padding_size_index.asInt();
+ param.rank = _ctx.at(input_index).shape().rank();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ auto block_size_alloc = ctx.at(::internal::tflite::operand::Index{param.block_size_index});
+ auto padding_size_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_size_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(padding_size_alloc),
+ CAST_CL(output_alloc));
+ builder.append("SpaceToBatchND", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+ assert(_ctx.at(block_size_index).shape().rank() == 1);
+ assert(_ctx.at(block_size_index).hasData() == true);
+
+ const int32_t *block_size =
+ reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base());
+
+ const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &block_size_shape = _ctx.at(block_size_index).shape();
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ output_index, asTensorInfo(asTensorShape(output_shape, false), _ctx.at(output_index).type(),
+ _ctx.at(output_index).scale(), _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(
+ input_index, asTensorInfo(asTensorShape(input_shape, false), _ctx.at(input_index).type(),
+ _ctx.at(input_index).scale(), _ctx.at(input_index).zeroPoint()));
+
+ _builder.addShapeConstr(block_size_index, asTensorInfo(asTensorShape(block_size_shape),
+ _ctx.at(block_size_index).type(),
+ _ctx.at(block_size_index).scale(),
+ _ctx.at(block_size_index).zeroPoint()));
+
+ // initializer for block_size
+ {
+ const auto block_size_base =
+ reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base());
+
+ assert(output_shape.dim(3) == input_shape.dim(3));
+ assert(output_shape.dim(1) == input_shape.dim(1) * block_size_base[0]);
+ assert(output_shape.dim(2) == input_shape.dim(2) * block_size_base[1]);
+ assert(output_shape.dim(0) == input_shape.dim(0) / (block_size_base[0] * block_size_base[1]));
+ assert(_ctx.at(block_size_index).type() == ANEURALNETWORKS_TENSOR_INT32);
+
+ assert((_ctx.at(block_size_index).data().size() / sizeof(int32_t)) == 2 &&
+ block_size_base[0] > 0 && block_size_base[1] > 0);
+
+ auto initializer = [block_size_base](::arm_compute::ITensor &tensor) {
+ const int32_t *from = reinterpret_cast<const int32_t *>(block_size_base);
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.ptr_to_element({0}));
+ into[0] = from[1];
+ into[1] = from[0];
+ };
+ _builder.addInitializer(block_size_index, initializer);
+ }
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int block_size_index;
+ const int32_t *block_size;
+ int32_t rank;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size_index = block_size_index.asInt();
+ param.block_size = block_size;
+ param.rank = _ctx.at(input_index).shape().rank();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ auto block_size_alloc = ctx.at(::internal::tflite::operand::Index{param.block_size_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(output_alloc));
+ builder.append("BatchToSpaceND", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ // {CL|Neon}L2Normalization performs the reduction only along dimension 0
+ // L2 Normalization always performs the reduction along the depth axis
+ // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
+ // choosing normalization parameters as below
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int32_t radius;
+ float alpha;
+ float beta;
+ float bias;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.radius = 2 * _ctx.at(ifm_index).shape().dim(3) + 1; // normSize = depth * 2 + 1
+ param.alpha = 1.0f; // In the implementation to make alpha_ become 1
+ param.beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
+ param.bias = 0.0f; // Don't offset the reduction.
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const auto norm_info =
+ ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, param.radius,
+ param.alpha, param.beta, param.bias, false);
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
+
+ builder.append("L2Normalize", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, norm_info);
+
+ builder.append("L2Normalize", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node)
+
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+ const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+
+ const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+ const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const PaddingCode padding_type =
+ static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+ assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+ (ANEURALNETWORKS_PADDING_VALID == padding_type));
+
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ uint32_t kw;
+ uint32_t kh;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.kh = kh;
+ param.kw = kw;
+
+ param.stride.vertical = vstride;
+ param.stride.horizontal = hstride;
+
+ param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+ ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+ : valid_padding();
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::L2,
+ ::arm_compute::Size2D{param.kw, param.kh},
+ asPadStrideInfo(param.padding, param.stride)};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), info);
+
+ builder.append("L2Pool2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEPoolingLayer> fn{new ::arm_compute::NEPoolingLayer};
+
+ fn->configure(ifm_alloc, ofm_alloc, info);
+
+ builder.append("L2Pool2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ const ::internal::tflite::operand::Index kh_index{node.param().kh_index};
+ const ::internal::tflite::operand::Index kw_index{node.param().kw_index};
+
+ const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+ const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+
+ const ::internal::tflite::operand::Index padding_left_index{node.param().padding_left_index};
+ const ::internal::tflite::operand::Index padding_right_index{node.param().padding_right_index};
+ const ::internal::tflite::operand::Index padding_top_index{node.param().padding_top_index};
+ const ::internal::tflite::operand::Index padding_bottom_index{node.param().padding_bottom_index};
+
+ const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
+
+ const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
+ const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
+
+ const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+ const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+ const int32_t padding_left = _ctx.at(padding_left_index).asScalar<int32_t>();
+ const int32_t padding_right = _ctx.at(padding_right_index).asScalar<int32_t>();
+ const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
+ const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
+
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+
+ uint32_t kw;
+ uint32_t kh;
+
+ Padding padding;
+ Stride stride;
+
+ FuseCode activation;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.kh = kh;
+ param.kw = kw;
+
+ param.stride.vertical = vstride;
+ param.stride.horizontal = hstride;
+
+ param.padding.left = padding_left;
+ param.padding.right = padding_right;
+ param.padding.top = padding_top;
+ param.padding.bottom = padding_bottom;
+
+ param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::L2,
+ ::arm_compute::Size2D{param.kw, param.kh},
+ asPadStrideInfo(param.padding, param.stride)};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ std::unique_ptr<::arm_compute::CLPoolingLayer> fn{new ::arm_compute::CLPoolingLayer};
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), info);
+
+ builder.append("L2Pool2D", std::move(fn));
+ }
+ else
+ {
+ std::unique_ptr<::arm_compute::NEPoolingLayer> fn{new ::arm_compute::NEPoolingLayer};
+
+ fn->configure(ifm_alloc, ofm_alloc, info);
+
+ builder.append("L2Pool2D", std::move(fn));
+ }
+
+ ActivationBuilder{builder}.append(param.activation, ofm_alloc);
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index lookups_index{node.param().lookups_index};
+ const ::internal::tflite::operand::Index values_index{node.param().values_index};
+
+ const auto &output_obj = _ctx.at(output_index);
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &values_obj = _ctx.at(values_index);
+
+ // Verify operand here, not at configure() to avoid acl's modifying
+ // TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
+ {
+ assert(lookups_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+
+ const auto &output_shape = output_obj.shape();
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &values_shape = values_obj.shape();
+
+ assert(lookups_shape.rank() == 1);
+ assert(values_shape.rank() >= 2);
+
+ // output should be a n-D tensor with the same rank and shape as the values tensor, except for
+ // the first dimension which has the same size as lookups' only dimension.
+ assert(output_shape.rank() == values_shape.rank());
+ assert(output_shape.dim(0) == lookups_shape.dim(0));
+ for (size_t n = 1; n < output_shape.rank(); ++n)
+ {
+ assert(output_shape.dim(n) == values_shape.dim(n));
+ }
+ }
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(output_obj.shape(), false), output_obj.type(),
+ output_obj.scale(), output_obj.zeroPoint()));
+ _builder.addShapeConstr(lookups_index,
+ asTensorInfo(asTensorShape(lookups_obj.shape()), lookups_obj.type(),
+ lookups_obj.scale(), lookups_obj.zeroPoint()));
+ _builder.addShapeConstr(values_index,
+ asTensorInfo(asTensorShape(values_obj.shape(), false), values_obj.type(),
+ values_obj.scale(), values_obj.zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int32_t output_index;
+ int32_t lookups_index;
+ int32_t values_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.lookups_index = lookups_index.asInt();
+ param.values_index = values_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index});
+ auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>();
+
+ fn->configure(CAST_CL(values_alloc), CAST_CL(output_alloc), CAST_CL(lookups_alloc));
+
+ builder.append("EmbeddingLookup", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::HashtableLookup::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index hits_index{node.param().hits_index};
+ const ::internal::tflite::operand::Index lookups_index{node.param().lookups_index};
+ const ::internal::tflite::operand::Index values_index{node.param().values_index};
+ const ::internal::tflite::operand::Index keys_index{node.param().keys_index};
+
+ const auto &lookups_obj = _ctx.at(lookups_index);
+ const auto &keys_obj = _ctx.at(keys_index);
+ const auto &hits_obj = _ctx.at(hits_index);
+ const auto &values_obj = _ctx.at(values_index);
+ const auto &output_obj = _ctx.at(output_index);
+
+ assert(lookups_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(keys_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+ assert(hits_obj.type() == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM);
+
+ const auto &lookups_shape = lookups_obj.shape();
+ const auto &keys_shape = keys_obj.shape();
+ const auto &hits_shape = hits_obj.shape();
+ const auto &values_shape = values_obj.shape();
+ const auto &output_shape = output_obj.shape();
+
+ assert(values_shape.rank() == output_shape.rank());
+
+ assert(lookups_shape.rank() == 1);
+ assert(keys_shape.rank() == 1);
+ assert(values_shape.dim(0) == keys_shape.dim(0));
+ assert(lookups_shape.dim(0) == output_shape.dim(0));
+
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(hits_index,
+ asTensorInfo(asTensorShape(_ctx.at(hits_index).shape()),
+ _ctx.at(hits_index).type(), _ctx.at(hits_index).type(),
+ _ctx.at(hits_index).zeroPoint()));
+
+ _builder.addShapeConstr(lookups_index, asTensorInfo(asTensorShape(_ctx.at(lookups_index).shape()),
+ _ctx.at(lookups_index).type(),
+ _ctx.at(lookups_index).scale(),
+ _ctx.at(lookups_index).zeroPoint()));
+ _builder.addShapeConstr(values_index,
+ asTensorInfo(asTensorShape(_ctx.at(values_index).shape()),
+ _ctx.at(values_index).type(), _ctx.at(values_index).scale(),
+ _ctx.at(values_index).zeroPoint()));
+ _builder.addShapeConstr(keys_index,
+ asTensorInfo(asTensorShape(_ctx.at(keys_index).shape()),
+ _ctx.at(keys_index).type(), _ctx.at(keys_index).scale(),
+ _ctx.at(keys_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int32_t output_index;
+ int32_t hits_index;
+ int32_t lookups_index;
+ int32_t values_index;
+ int32_t keys_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.hits_index = hits_index.asInt();
+ param.lookups_index = lookups_index.asInt();
+ param.values_index = values_index.asInt();
+ param.keys_index = keys_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto hits_alloc = ctx.at(::internal::tflite::operand::Index{param.hits_index});
+ auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index});
+ auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index});
+ auto keys_alloc = ctx.at(::internal::tflite::operand::Index{param.keys_index});
+
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>();
+
+ fn->configure(CAST_CL(lookups_alloc), CAST_CL(keys_alloc), CAST_CL(values_alloc),
+ CAST_CL(output_alloc), CAST_CL(hits_alloc));
+
+ builder.append("HashtableLookup", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index radius_index{node.param().radius_index};
+ const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+ const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index};
+ const ::internal::tflite::operand::Index beta_index{node.param().beta_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ int32_t radius;
+ float bias;
+ float alpha;
+ float beta;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ param.radius = _ctx.at(radius_index).asScalar<int32_t>();
+ param.alpha = _ctx.at(alpha_index).asScalar<float>();
+ param.beta = _ctx.at(beta_index).asScalar<float>();
+ param.bias = _ctx.at(bias_index).asScalar<float>();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
+ param.radius * 2 + 1, param.alpha,
+ param.beta, param.bias, false);
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
+
+ builder.append("LocalResponseNormalization", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc, norm_info);
+
+ builder.append("LocalResponseNormalization", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+ const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+ assert(_ctx.at(input_index).shape().rank() == 4);
+ assert(_ctx.at(output_index).shape().rank() == 4);
+
+ int32_t block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+ assert(block_size > 0);
+
+ { // assertions block
+ const auto output_shape = _ctx.at(output_index).shape();
+ const auto input_shape = _ctx.at(input_index).shape();
+ assert(output_shape.dim(0) == input_shape.dim(0));
+ assert(output_shape.dim(1) == input_shape.dim(1) * block_size);
+ assert(output_shape.dim(2) == input_shape.dim(2) * block_size);
+ assert(input_shape.dim(3) % (block_size * block_size) == 0);
+ assert(output_shape.dim(3) == input_shape.dim(3) / (block_size * block_size));
+ }
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+ _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ int32_t block_size;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ param.block_size = block_size;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ {
+ if (::internal::arm_compute::isGpuMode()) // GPU
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+
+ builder.append("DepthToSpace", std::move(fn));
+ }
+ else // NEON
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
+{
+ VERBOSE(Unpack) << "Configure Unpack operation" << std::endl;
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ uint32_t input_rank = _ctx.at(ifm_index).shape().rank();
+
+ assert(input_rank == 4 || input_rank == 3 || input_rank == 2);
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+ _ctx.at(ifm_index).type()));
+
+ int32_t axis =
+ _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>();
+ // Negatige axis is supported, -1 implies R-1 axis where R is input rank
+ if (axis < 0)
+ {
+ axis += input_rank;
+ assert(axis >= 0);
+ }
+ uint32_t axis_uint = ToARMComputeAxis(input_rank, axis).value();
+ // int32_t num_split =
+ // _ctx.at(::internal::tflite::operand::Index{node.param().num_split_index}).asScalar<int32_t>();
+
+ for (const auto &index : node.param().ofm_indexes)
+ {
+ const ::internal::tflite::operand::Index ofm_index{index};
+ _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
+ _ctx.at(ofm_index).type()));
+ }
+
+ struct Param
+ {
+ std::vector<int32_t> ofm_indexes;
+ int ifm_index;
+ uint32_t axis;
+ };
+
+ if (input_rank == 4)
+ {
+ // TODO: generate test case for this and generalize 4D method all cases.
+ throw std::runtime_error("UNPACK_4D not implemented");
+ }
+ else if (input_rank == 3)
+ {
+ Param param;
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis_uint;
+ for (const auto &index : node.param().ofm_indexes)
+ {
+ param.ofm_indexes.push_back(index);
+ }
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLUnstack>();
+ std::vector<::arm_compute::ICLTensor *> outputs;
+ for (const auto &index : param.ofm_indexes)
+ {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{index});
+ outputs.push_back(CAST_CL(output_alloc));
+ }
+ fn->configure(CAST_CL(input_alloc), outputs, param.axis);
+
+ builder.append("Unpack", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+ }
+ else if (input_rank == 2)
+ {
+ throw std::runtime_error("UNPACK_2D not implemented");
+ }
+ else
+ {
+ throw std::runtime_error("UNPACK axis is not valid");
+ }
+}
+
+void Planner::visit(const ::internal::tflite::op::Pack::Node &node)
+{
+ VERBOSE(Pack) << "Configure Pack operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const uint32_t output_rank = _ctx.at(ofm_index).shape().rank();
+ const uint32_t input_rank = output_rank - 1;
+
+ assert(output_rank == 4 || output_rank == 3 || output_rank == 2);
+
+ for (const auto &index : node.param().ifm_indexes)
+ {
+ const ::internal::tflite::operand::Index ifm_index{index};
+ assert(_ctx.at(ifm_index).shape().rank() == input_rank);
+ _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+ _ctx.at(ifm_index).type()));
+ }
+
+ _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
+ _ctx.at(ofm_index).type()));
+
+ int32_t axis =
+ _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>();
+ // A negative axis implies axis from the end.
+ // For example, axis = -1 implies the first axis from the end, i.e. axis = Rank - 1.
+ // Similarly, axis = -2 imples second axis from the end, i.e. axis = Rank - 2.
+ if (axis < 0)
+ {
+ axis += output_rank;
+ assert(axis >= 0);
+ }
+ uint32_t axis_uint = ToARMComputeAxis(output_rank, axis).value();
+
+ struct Param
+ {
+ std::vector<int32_t> ifm_indexes;
+ int ofm_index;
+ uint32_t axis;
+ };
+
+ if (input_rank == 3)
+ {
+ // TODO: generate test case for this and generalize 4D method all cases.
+ throw std::runtime_error("PACK_3D not implemented");
+ }
+ else if (input_rank == 2)
+ {
+ Param param;
+ param.ofm_index = ofm_index.asInt();
+ param.axis = axis_uint;
+
+ for (const auto &index : node.param().ifm_indexes)
+ {
+ param.ifm_indexes.push_back(index);
+ }
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStackLayer>();
+ std::vector<::arm_compute::ICLTensor *> inputs;
+ for (const auto &index : param.ifm_indexes)
+ {
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{index});
+ inputs.push_back(CAST_CL(input_alloc));
+ }
+ fn->configure(inputs, param.axis, CAST_CL(output_alloc));
+
+ builder.append("Pack", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+ }
+ else if (input_rank == 1)
+ {
+ throw std::runtime_error("PACK_1D not implemented");
+ }
+ else
+ {
+ throw std::runtime_error("PACK axis is not valid");
+ }
+}
+
+void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
+{
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+ builder.append("Neg", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
+
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Exp::Node &node)
+{
+ VERBOSE(Exp) << "Configure Exp operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExpLayer>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+
+ builder.append("Exp", std::move(fn));
+ }
+ else
+ {
+ throw std::runtime_error("Not supported");
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node)
+{
+ VERBOSE(ReduceSum) << "Configure ReduceSum operation" << std::endl;
+
+ const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+ const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+ const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+ const auto ifm_shape = _ctx.at(ifm_index).shape();
+ const auto ofm_shape = _ctx.at(ofm_index).shape();
+ const auto axis_shape = _ctx.at(axis_index).shape();
+
+ assert(ifm_shape.rank() <= 4);
+ assert(ofm_shape.rank() <= ifm_shape.rank());
+ assert(_ctx.at(axis_index).hasData());
+ assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+ // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+ // supports cases reducing height and width or reducing depth.
+ // TODO We have to support all cases of dimensions up to 4.
+ // For correct permuting, we have to set output's shape to be equal in dimension position of the
+ // input. But the positions of the same dimensions in the input and output may be set differently.
+ // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+ // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+ // extend it in 4 dimensions, it should be {1,1,3,5}.
+ // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+ // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+ // next operation is not desired.
+ if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+ {
+ if (ofm_shape.rank() == 2)
+ {
+ // Reducing HW
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+ }
+ else if (ofm_shape.rank() == 3)
+ {
+ // Reducing C or
+ // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+ assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+ ifm_shape.dim(2) == ofm_shape.dim(2) ||
+ (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+ (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+ ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+ }
+ }
+
+ // Set shape constraints
+ _builder.addShapeConstr(
+ ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+ _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+ _builder.addShapeConstr(
+ ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+ _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+ uint32_t input_rank = ifm_shape.rank();
+ std::set<uint32_t> axis;
+ int32_t axis_rank = axis_shape.rank();
+
+ if (axis_rank == 0)
+ {
+ int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+ if (axis_value < 0)
+ {
+ axis_value += input_rank;
+ }
+ axis.insert(ToARMComputeAxis(input_rank, axis_value).value());
+ }
+ else if (axis_rank == 1)
+ {
+ const auto axis_base = _ctx.at(axis_index).data().base();
+ const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+ // If axis's data does not exist as constant values and can be gotten as input data, we have to
+ // find a way to infer output shape when sinking output.
+ assert(axis_base != nullptr);
+ for (uint32_t n = 0; n < axis_size; ++n)
+ {
+ int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+ if (axis_value < 0)
+ {
+ axis_value += input_rank;
+ }
+ axis.insert(ToARMComputeAxis(input_rank, axis_value).value());
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Not supported axis");
+ }
+
+ struct Param
+ {
+ int ofm_index;
+ int ifm_index;
+ std::set<uint32_t> axis;
+ };
+
+ Param param;
+
+ param.ofm_index = ofm_index.asInt();
+ param.ifm_index = ifm_index.asInt();
+ param.axis = axis;
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+ auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+ fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+ ::arm_compute::ReduceOperation::SUM);
+
+ builder.append("ReduceSum", std::move(fn));
+ }
+ else
+ throw std::runtime_error("Not supported, yet");
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Abs::Node &node)
+{
+ VERBOSE(Tanh) << "Configure Abs operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set shape constraints
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ _ctx.at(input_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
+
+ builder.append("Abs", std::move(fn));
+ }
+ else
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+ fn->configure(input_alloc, output_alloc, act_info);
+
+ builder.append("Abs", std::move(fn));
+ }
+ };
+
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::NotEqual::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparison>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::ComparisonOperation::NotEqual);
+
+ builder.append("NotEqual", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalAnd::Node &node)
+{
+ VERBOSE(Logical_AND) << "Configure Logical_AND operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::BinaryLogicalOperation::AND);
+
+ builder.append("LogicalAnd", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalNot::Node &node)
+{
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ ::arm_compute::DataType::U8, _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ _builder.addShapeConstr(input_index,
+ asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+ ::arm_compute::DataType::U8, _ctx.at(input_index).scale(),
+ _ctx.at(input_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input_index = input_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBitwiseNot>();
+
+ fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
+
+ builder.append("LogicalNot", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalOr::Node &node)
+{
+ VERBOSE(LogicalOr) << "Configure LogicalOr operation" << std::endl;
+
+ const ::internal::tflite::operand::Index output_index{node.param().output_index};
+ const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+ const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+ // Set Shape Constraints and TensorInfo
+ _builder.addShapeConstr(output_index,
+ asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+ _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+ _ctx.at(output_index).zeroPoint()));
+
+ if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+ {
+ const auto broadcast_rank =
+ std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+ .extendRank(broadcast_rank);
+ const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+ .extendRank(broadcast_rank);
+ }
+
+ _builder.addShapeConstr(input1_index,
+ asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+ _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+ _ctx.at(input1_index).zeroPoint()));
+ _builder.addShapeConstr(input2_index,
+ asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+ _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+ _ctx.at(input2_index).zeroPoint()));
+
+ // Construct operation parameters
+ struct Param
+ {
+ int output_index;
+ int input1_index;
+ int input2_index;
+ };
+
+ Param param;
+
+ param.output_index = output_index.asInt();
+ param.input1_index = input1_index.asInt();
+ param.input2_index = input2_index.asInt();
+ auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+ auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+ auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+ auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+ fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+ ::arm_compute::BinaryLogicalOperation::OR);
+
+ builder.append("LogicalOr", std::move(fn));
+ }
+ else
+ {
+ // TODO Add NEON support
+
+ throw std::runtime_error("Not supported yet");
+ }
+ };
+ _builder.addStage(stage);
+}
+
+class AllocationContext final : public IAllocationContext
+{
+public:
+ AllocationContext(::internal::arm_compute::Plan &plan) : _plan{plan}
+ {
+ // DO NOTHING
+ }
+
+public:
+ ::arm_compute::ITensor *at(const ::internal::tflite::operand::Index &ind) const override
+ {
+ return _plan.operands().at(ind).ptr();
+ }
+
+private:
+ ::internal::arm_compute::Plan &_plan;
+};
+
+class ExecutionBuilder final : public IExecutionBuilder
+{
+public:
+ ExecutionBuilder(::internal::arm_compute::Plan &plan) : _plan{plan}
+ {
+ // DO NOTHING
+ }
+
+public:
+ void append(const std::string &name, std::unique_ptr<::arm_compute::IFunction> &&f) override
+ {
+ _plan.operations().append(std::move(f));
+ _plan.operations().at(_plan.operations().size() - 1).name() = name;
+ }
+
+#ifdef TFLITE_PROFILING_ENABLED
+public:
+ int plan_op_size() const { return _plan.operations().size(); }
+ void addOpIndexToSteps(int from, int to, int op_idx)
+ {
+ for (int i = from; i < to; ++i)
+ _plan.operations().at(i).op_idx() = op_idx;
+ }
+#endif
+
+private:
+ ::internal::arm_compute::Plan &_plan;
+};
+
+/**
+ * @brief Class to provide methods of compilation plan builder
+ */
+class PlanBuilder final : public IPlanBuilder
+{
+public:
+ /**
+ * @brief Construct a new PlanBuilder object with Plan
+ * @param [in] plan The Plan object
+ */
+ PlanBuilder(::internal::arm_compute::Plan &plan) : _plan{plan}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Add TensorInfo with Shape Constraints
+ * @param [in] ind Index of operand
+ * @param [in] info TensorInfo value to set to index of operand
+ * @return N/A
+ */
+ void addShapeConstr(const ::internal::tflite::operand::Index &ind,
+ const ::arm_compute::TensorInfo &info) override;
+
+public:
+ /**
+ * @brief Add Subsumption constraints
+ * @param [in] ind Index of operand
+ * @param [in] base Index of base operand of Subsumption
+ * @param [in] offset Offset of Subsumption
+ * @param [in] shape Shape of Subsumption
+ * @param [in] extend_parent extend_parent value of Subsumption
+ * @return N/A
+ */
+ void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
+ const ::internal::tflite::operand::Index &base,
+ const ::arm_compute::Coordinates &offset,
+ const ::arm_compute::TensorShape &shape, bool extend_parent) override;
+
+public:
+ /**
+ * @brief Add Initializer lambda with ITensor param
+ * @param [in] ind Index of operand
+ * @param [in] initializer Initializer to add
+ * @return N/A
+ */
+ void addInitializer(const ::internal::tflite::operand::Index &ind,
+ const Initializer &initializer) override;
+
+public:
+ /**
+ * @brief Add Stage lambda with IAllocationContext and IExecutionBuilder params
+ * @param [in] stage Stage to add
+ * @return N/A
+ */
+ void addStage(const Stage &stage) override;
+
+public:
+ /**
+ * @brief Finilize(build) the Plan
+ * @return N/A
+ */
+ void finalize(void) const;
+
+private:
+ ::internal::arm_compute::Plan &_plan;
+
+private:
+ struct Subsumption
+ {
+ public:
+ Subsumption(const ::internal::tflite::operand::Index &base,
+ const ::arm_compute::Coordinates &offset, const ::arm_compute::TensorShape &shape,
+ bool extend_parent)
+ : _base{base}, _offset{offset}, _shape{shape}, _extend_parent{extend_parent}
+ {
+ // DO NOTHING
+ }
+
+ public:
+ const ::internal::tflite::operand::Index &base(void) const { return _base; }
+ const ::arm_compute::Coordinates &offset(void) const { return _offset; }
+ const ::arm_compute::TensorShape &shape(void) const { return _shape; }
+ const bool extend_parent(void) const { return _extend_parent; }
+
+ private:
+ const ::internal::tflite::operand::Index _base;
+ const ::arm_compute::Coordinates _offset;
+ const ::arm_compute::TensorShape _shape;
+ const bool _extend_parent;
+ };
+
+private:
+ std::map<int, ::arm_compute::TensorInfo> _tensor_info_ctx;
+ std::map<int, std::shared_ptr<Subsumption>> _subsumption_ctx;
+ std::map<int, Initializer> _initializer_ctx;
+ std::vector<Stage> _stages;
+};
+
+void PlanBuilder::addShapeConstr(const ::internal::tflite::operand::Index &ind,
+ const ::arm_compute::TensorInfo &info)
+{
+ _tensor_info_ctx[ind.asInt()] = info;
+}
+
+void PlanBuilder::addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
+ const ::internal::tflite::operand::Index &base,
+ const ::arm_compute::Coordinates &offset,
+ const ::arm_compute::TensorShape &shape, bool extend_parent)
+{
+ _subsumption_ctx[ind.asInt()] = std::make_shared<Subsumption>(base, offset, shape, extend_parent);
+}
+
+void PlanBuilder::addInitializer(const ::internal::tflite::operand::Index &ind,
+ const Initializer &initializer)
+{
+ _initializer_ctx[ind.asInt()] = initializer;
+}
+
+void PlanBuilder::addStage(const Stage &stage) { _stages.emplace_back(stage); }
+
+#include <stack>
+
+void PlanBuilder::finalize(void) const
+{
+ // ITensor objects to be initialized later
+ std::vector<std::shared_ptr<::arm_compute::ITensor>> tensors;
+
+ // Create Tensor & CLSubTensor
+ auto isAllocated = [this](int ind) {
+ const ::internal::tflite::operand::Index operand_index{ind};
+ return _plan.operands().exist(operand_index);
+ };
+
+ auto setCLTensor = [&](int ind) {
+ auto tensor = std::make_shared<::arm_compute::CLTensor>();
+
+ tensor->allocator()->init(_tensor_info_ctx.at(ind));
+
+ // NOTE Do NOT allocate here. allocate should be invoked after configure functions
+ _plan.operands().set(::internal::tflite::operand::Index{ind}, tensor);
+ tensors.emplace_back(tensor);
+ };
+
+ auto setCLSubTensor = [&](int curr) {
+ const auto &sub_info = *(_subsumption_ctx.find(curr)->second);
+
+ auto base_tensor = _plan.operands().at(sub_info.base()).ptr();
+
+ assert(base_tensor != nullptr);
+
+ auto curr_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+ CAST_CL(base_tensor), sub_info.shape(), sub_info.offset(), sub_info.extend_parent());
+
+ _plan.operands().set(::internal::tflite::operand::Index{curr}, curr_tensor);
+ };
+
+ auto setNETensor = [&](int ind) {
+ auto tensor = std::make_shared<::arm_compute::Tensor>();
+
+ tensor->allocator()->init(_tensor_info_ctx.at(ind));
+
+ // NOTE Do NOT allocate here. allocate should be invoked after configure functions
+ _plan.operands().set(::internal::tflite::operand::Index{ind}, tensor);
+ tensors.emplace_back(tensor);
+ };
+
+ auto setNESubTensor = [&](int curr) {
+ const auto &sub_info = *(_subsumption_ctx.find(curr)->second);
+
+ auto base_tensor = _plan.operands().at(sub_info.base()).ptr();
+
+ assert(base_tensor != nullptr);
+
+ auto curr_tensor = std::make_shared<::arm_compute::SubTensor>(base_tensor, sub_info.shape(),
+ sub_info.offset());
+
+ _plan.operands().set(::internal::tflite::operand::Index{curr}, curr_tensor);
+ };
+
+ for (auto it = _subsumption_ctx.begin(); it != _subsumption_ctx.end(); ++it)
+ {
+ std::stack<int> stack;
+
+ stack.push(it->first);
+
+ while (!stack.empty())
+ {
+ const auto curr = stack.top();
+
+ if (isAllocated(curr))
+ {
+ // Skip if already allocated
+ stack.pop();
+ continue;
+ }
+
+ auto it_s = _subsumption_ctx.find(curr);
+
+ if (it_s == _subsumption_ctx.end())
+ {
+ if (::internal::arm_compute::isGpuMode())
+ setCLTensor(curr);
+ else
+ setNETensor(curr);
+ stack.pop();
+ continue;
+ }
+
+ const auto &sub_info = *(it_s->second);
+
+ if (isAllocated(sub_info.base().asInt()))
+ {
+ if (::internal::arm_compute::isGpuMode())
+ setCLSubTensor(curr);
+ else
+ setNESubTensor(curr);
+ stack.pop();
+ }
+ else
+ {
+ // Allocate base tensor first
+ stack.push(sub_info.base().asInt());
+ }
+ }
+ }
+
+ for (auto it = _tensor_info_ctx.begin(); it != _tensor_info_ctx.end(); ++it)
+ {
+ if (isAllocated(it->first))
+ {
+ // Skip if already allocated
+ continue;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ setCLTensor(it->first);
+ else
+ setNETensor(it->first);
+ }
+
+ // Process Stage
+ AllocationContext allocation_context{_plan};
+ ExecutionBuilder execution_builder{_plan};
+
+ for (int idx = 0; idx < _stages.size(); idx++)
+ {
+ const auto &stage = _stages[idx];
+#ifdef TFLITE_PROFILING_ENABLED
+ int from = execution_builder.plan_op_size();
+#endif
+ stage(allocation_context, execution_builder);
+#ifdef TFLITE_PROFILING_ENABLED
+ int to = execution_builder.plan_op_size();
+ execution_builder.addOpIndexToSteps(from, to, idx);
+#endif
+ }
+
+ // Allocate Tensor Memory
+ for (const auto &tensor : tensors)
+ {
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto cl_tensor = CAST_CL(tensor.get());
+ cl_tensor->allocator()->allocate();
+ }
+ else
+ {
+ auto ne_tensor = CAST_NE(tensor.get());
+ ne_tensor->allocator()->allocate();
+ }
+ }
+
+ // Fill weight/bias
+ for (auto it = _initializer_ctx.begin(); it != _initializer_ctx.end(); ++it)
+ {
+ const ::internal::tflite::operand::Index operand_index{it->first};
+ _plan.operands().at(operand_index).access(it->second);
+ }
+
+ // Initialize CLTensors that have data in their corresponding NNAPI operand but are not
+ // initialized yet
+ const auto &operands = _plan.model().operands();
+ for (int idx = 0; idx < operands.size(); ++idx)
+ {
+ const ::internal::tflite::operand::Index operand_idx{idx};
+ if (isAllocated(idx) && operands.at(operand_idx).hasData() &&
+ _initializer_ctx.find(idx) == _initializer_ctx.end())
+ {
+ auto rank = operands.at(operand_idx).shape().rank();
+ auto base = operands.at(operand_idx).data().base();
+ auto type = operands.at(operand_idx).type();
+ auto shape = operands.at(operand_idx).shape();
+
+ // Need to support scalar types (ANEURALNETWORKS_FLOAT32 and ANEURALNETWORKS_INT32)
+ // for rank > 1 tensor, because it can be operand of broadcast operation
+ switch (rank)
+ {
+ case 0: // scalar
+ {
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initVectorTensor<float>, _1, base, 1);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = std::bind(initVectorTensor<int32_t>, _1, base, 1);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_UINT32:
+ {
+ auto initializer = std::bind(initVectorTensor<uint32_t>, _1, base, 1);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer = std::bind(initVectorTensor<uint8_t>, _1, base, 1);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown scalar type, type : " + std::to_string(type));
+ break;
+ }
+ break;
+ }
+ case 1: // vector
+ {
+ auto size = shape.asVector();
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initVectorTensor<float>, _1, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = std::bind(initVectorTensor<int32_t>, _1, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer = std::bind(initVectorTensor<uint8_t>, _1, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown tensor type, type : " + std::to_string(type));
+ break;
+ }
+ break;
+ }
+ case 2: // matrix
+ {
+ const auto matrix_shape = shape.asMatrix();
+ auto size = operands.at(operand_idx).data().size();
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initMatrixTensor<float>, _1, matrix_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = std::bind(initMatrixTensor<int32_t>, _1, matrix_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer = std::bind(initMatrixTensor<uint8_t>, _1, matrix_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown tensor type, type : " + std::to_string(type));
+ break;
+ }
+ break;
+ }
+ case 3: // 3D tensor
+ {
+ const auto tensor_shape = shape.asTensor();
+ auto size = operands.at(operand_idx).data().size();
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initTensor3D<float>, _1, tensor_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer = std::bind(initTensor3D<int32_t>, _1, tensor_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer = std::bind(initTensor3D<uint8_t>, _1, tensor_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown tensor type, type : " + std::to_string(type));
+ break;
+ }
+ break;
+ }
+ case 4: // feature
+ {
+ const auto feature_shape = shape.asFeature();
+ auto size = operands.at(operand_idx).data().size();
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ {
+ auto initializer = std::bind(initFeatureTensor<float>, _1, feature_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ {
+ auto initializer =
+ std::bind(initFeatureTensor<int32_t>, _1, feature_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ {
+ auto initializer =
+ std::bind(initFeatureTensor<uint8_t>, _1, feature_shape, base, size);
+ _plan.operands().at(operand_idx).access(initializer);
+ break;
+ }
+ default:
+ throw std::runtime_error("Unknown tensor type, type : " + std::to_string(type));
+ break;
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+ }
+ }
+}
+
+//
+// NNAPI Implementation
+//
+int ANeuralNetworksCompilation_create(ANeuralNetworksModel *model,
+ ANeuralNetworksCompilation **compilation)
+{
+ if ((model == nullptr) || (compilation == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (!model->isFinished())
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ std::shared_ptr<const internal::tflite::Model> internal;
+
+ model->release(internal);
+
+ ANeuralNetworksCompilation *compilation_ptr = new ANeuralNetworksCompilation(internal);
+ if (compilation_ptr == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+ *compilation = compilation_ptr;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation *compilation,
+ int32_t preference)
+{
+ if (compilation == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ // NOTE Pure CL runimte currently ignores this API call
+ // TODO Use preference
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
+{
+ if (compilation == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (::internal::arm_compute::isGpuMode())
+ {
+ arm_compute::CLScheduler::get().default_init();
+ // NOTE CLKernelLibraryEx must use the same context as CLScheduler
+ // It did not check whether another device is available.
+ arm_compute::CLKernelLibraryEx::get().init(
+ "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
+ }
+
+ const auto &operands = compilation->plan().model().operands();
+ const auto &operations = compilation->plan().model().operations();
+
+ PlanBuilder plan_builder{compilation->plan()};
+
+ for (uint32_t n = 0; n < operations.size(); ++n)
+ {
+ operations.at(n).accept(Planner{operands, plan_builder});
+ }
+
+ plan_builder.finalize();
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation *compilation)
+{
+ delete compilation;
+}
diff --git a/runtime/contrib/pure_arm_compute/src/compilation.h b/runtime/contrib/pure_arm_compute/src/compilation.h
new file mode 100644
index 000000000..1a06d06b9
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/compilation.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file compilation.h
+ * @brief This file defines ANeuralNetworksCompilation class for handling Compilation NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __COMPILATION_H__
+#define __COMPILATION_H__
+
+#include "internal/Model.h"
+#include "internal/arm_compute.h"
+
+/**
+ * @brief struct to define Compilation of NNAPI
+ */
+struct ANeuralNetworksCompilation
+{
+public:
+ /**
+ * @brief Construct with params
+ * @param [in] model Pointer of internal::tflite::Model to set internal::arm_compute::Plan
+ */
+ ANeuralNetworksCompilation(const std::shared_ptr<const internal::tflite::Model> &model)
+ : _plan{new internal::arm_compute::Plan{model}}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get reference of internal::arm_compute::Plan
+ * @return Reference of internal::arm_compute::Plan
+ */
+ internal::arm_compute::Plan &plan(void) { return *_plan; }
+
+public:
+ /**
+ * @brief Publish internal Plan to param
+ * @param [out] plan Pointer of internal::arm_compute::Plan to be set
+ * @return N/A
+ */
+ void publish(std::shared_ptr<const internal::arm_compute::Plan> &plan) { plan = _plan; }
+ /**
+ * @brief Get @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false
+ * @return @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false
+ */
+ bool isFinished(void) { return _isFinished; }
+ /**
+ * @brief Mark compilation process finished
+ * @return N/A
+ */
+ void markAsFinished() { _isFinished = true; }
+
+private:
+ std::shared_ptr<internal::arm_compute::Plan> _plan;
+ bool _isFinished{false};
+};
+
+#endif
diff --git a/runtime/contrib/pure_arm_compute/src/event.cc b/runtime/contrib/pure_arm_compute/src/event.cc
new file mode 100644
index 000000000..247586cc3
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/event.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+
+#include "event.h"
+
+int ANeuralNetworksEvent_wait(ANeuralNetworksEvent *event)
+{
+ if (event == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksEvent_free(ANeuralNetworksEvent *event) { delete event; }
diff --git a/runtime/contrib/pure_arm_compute/src/event.h b/runtime/contrib/pure_arm_compute/src/event.h
new file mode 100644
index 000000000..b5595583c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/event.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file event.h
+ * @brief This file defines ANeuralNetworksEvent struct for handling Event NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __EVENT_H__
+#define __EVENT_H__
+
+/**
+ * @brief struct to define Event of NNAPI
+ */
+struct ANeuralNetworksEvent
+{
+};
+
+#endif
diff --git a/runtime/contrib/pure_arm_compute/src/execution.cc b/runtime/contrib/pure_arm_compute/src/execution.cc
new file mode 100644
index 000000000..657d25288
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/execution.cc
@@ -0,0 +1,628 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+
+#include "compilation.h"
+#include "execution.h"
+#include "profiling/profiling.h"
+#include "profiling/profiler.h"
+#include "event.h"
+
+#include "internal/VectorSource.h"
+#include "internal/MatrixSource.h"
+#include "internal/Tensor3DSource.h"
+#include "internal/FeatureSource.h"
+#include "internal/TensorSource.h"
+
+#include "internal/Sinks.h"
+#include "internal/VectorSink.h"
+#include "internal/MatrixSink.h"
+#include "internal/Tensor3DSink.h"
+#include "internal/FeatureSink.h"
+
+#include "misc/feature/IndexIterator.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include <cassert>
+
+static void asVectorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ int32_t len, const void *buffer, size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->source<VectorSource<float>>(index, len, reinterpret_cast<const float *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->source<VectorSource<int32_t>>(index, len,
+ reinterpret_cast<const int32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->source<VectorSource<uint32_t>>(index, len,
+ reinterpret_cast<const uint32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->source<VectorSource<uint8_t>>(index, len,
+ reinterpret_cast<const uint8_t *>(buffer), length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::matrix::Shape &shape, const void *buffer,
+ size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->source<MatrixSource<float>>(index, shape, reinterpret_cast<const float *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->source<MatrixSource<int32_t>>(index, shape,
+ reinterpret_cast<const int32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->source<MatrixSource<uint32_t>>(index, shape,
+ reinterpret_cast<const uint32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->source<MatrixSource<uint8_t>>(index, shape,
+ reinterpret_cast<const uint8_t *>(buffer), length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::tensor::Shape &shape, const void *buffer,
+ size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->source<Tensor3DSource<float>>(index, shape,
+ reinterpret_cast<const float *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->source<Tensor3DSource<int32_t>>(index, shape,
+ reinterpret_cast<const int32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->source<Tensor3DSource<uint32_t>>(
+ index, shape, reinterpret_cast<const uint32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->source<Tensor3DSource<uint8_t>>(index, shape,
+ reinterpret_cast<const uint8_t *>(buffer), length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::tensor::Shape &shape, const void *buffer,
+ size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->source<TensorSource<float>>(index, shape, reinterpret_cast<const float *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->source<TensorSource<int32_t>>(index, shape,
+ reinterpret_cast<const int32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->source<TensorSource<uint32_t>>(index, shape,
+ reinterpret_cast<const uint32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->source<TensorSource<uint8_t>>(index, shape,
+ reinterpret_cast<const uint8_t *>(buffer), length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asFeatureSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::feature::Shape &shape, const void *buffer,
+ size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->source<FeatureSource<float>>(index, shape, reinterpret_cast<const float *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->source<FeatureSource<int32_t>>(index, shape,
+ reinterpret_cast<const int32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->source<FeatureSource<uint32_t>>(
+ index, shape, reinterpret_cast<const uint32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->source<FeatureSource<uint8_t>>(index, shape,
+ reinterpret_cast<const uint8_t *>(buffer), length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asVectorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ int32_t len, void *buffer, size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->sink<VectorSink<float>>(index, len, reinterpret_cast<float *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->sink<VectorSink<int32_t>>(index, len, reinterpret_cast<int32_t *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->sink<VectorSink<uint32_t>>(index, len, reinterpret_cast<uint32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->sink<VectorSink<uint8_t>>(index, len, reinterpret_cast<uint8_t *>(buffer), length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asMatrixSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ int32_t H, int32_t W, void *buffer, size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->sink<MatrixSink<float>>(index, H, W, reinterpret_cast<float *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->sink<MatrixSink<int32_t>>(index, H, W, reinterpret_cast<int32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->sink<MatrixSink<uint32_t>>(index, H, W, reinterpret_cast<uint32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->sink<MatrixSink<uint8_t>>(index, H, W, reinterpret_cast<uint8_t *>(buffer),
+ length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::feature::Shape &shape, void *buffer, size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->sink<FeatureSink<float>>(index, shape, reinterpret_cast<float *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->sink<FeatureSink<int32_t>>(index, shape, reinterpret_cast<int32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->sink<FeatureSink<uint32_t>>(index, shape, reinterpret_cast<uint32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->sink<FeatureSink<uint8_t>>(index, shape, reinterpret_cast<uint8_t *>(buffer),
+ length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
+{
+ assert(shape.rank() == 3);
+
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->sink<Tensor3DSink<float>>(index, shape, reinterpret_cast<float *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->sink<Tensor3DSink<int32_t>>(index, shape, reinterpret_cast<int32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->sink<Tensor3DSink<uint32_t>>(index, shape, reinterpret_cast<uint32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->sink<Tensor3DSink<uint8_t>>(index, shape, reinterpret_cast<uint8_t *>(buffer),
+ length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+static void asTensorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
+ const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ execution->sink<TensorSink<float>>(index, shape, reinterpret_cast<float *>(buffer), length);
+ break;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ execution->sink<TensorSink<int32_t>>(index, shape, reinterpret_cast<int32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_UINT32:
+ execution->sink<TensorSink<uint32_t>>(index, shape, reinterpret_cast<uint32_t *>(buffer),
+ length);
+ break;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ execution->sink<TensorSink<uint8_t>>(index, shape, reinterpret_cast<uint8_t *>(buffer),
+ length);
+ break;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+//
+// NNAPI Implementation
+//
+int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
+ ANeuralNetworksExecution **execution)
+{
+ if ((compilation == nullptr) || (execution == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ std::shared_ptr<const ::internal::arm_compute::Plan> plan;
+ compilation->publish(plan);
+ ANeuralNetworksExecution *execution_ptr = new ANeuralNetworksExecution{plan};
+ if (execution_ptr == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+ *execution = execution_ptr;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, const void *buffer,
+ size_t length)
+{
+ // Don't check type
+ // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
+ // If the input or output is optional and omitted then it need not have a fully specified tensor
+ // operand type
+ if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const auto &operands = execution->plan().model().operands();
+
+ // TODO Check type conflicts
+
+ // NOTE The current implemenation assumes that every input is a feature map.
+ // TODO Remove this assumption
+ const auto operand_index = execution->plan().model().inputs.at(index);
+ int32_t input_type = operands.at(operand_index).type();
+ // NOTE TFLite passes type parameter unconditionally as nullptr.
+ // Is it necessary to reget type value already set in model step?
+ if (type != nullptr)
+ {
+ input_type = type->type;
+ }
+
+ auto shape = operands.at(operand_index).shape();
+ auto rank = shape.rank();
+
+ if (rank == 1)
+ {
+ const auto len = shape.dim(0);
+
+ asVectorSource(execution, input_type, index, len, buffer, length);
+ }
+ else if (rank == 2)
+ {
+ const auto &operand_shape = shape.asMatrix();
+
+ asMatrixSource(execution, input_type, index, operand_shape, buffer, length);
+ }
+ else if (rank == 3)
+ {
+ const auto &operand_shape = shape.asTensor();
+
+ asTensor3DSource(execution, input_type, index, operand_shape, buffer, length);
+ }
+ else if (rank == 4)
+ {
+ const auto &operand_shape = shape.asFeature();
+
+ asFeatureSource(execution, input_type, index, operand_shape, buffer, length);
+ }
+ else
+ {
+ // NOTE TensorSource is much slower than specialized Source(s)
+ const auto &operand_shape = shape.asTensor();
+
+ asTensorSource(execution, input_type, index, operand_shape, buffer, length);
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+// squeeze(shape) eliminates all the dimensions whose dimensionality is 1
+// For example, squeeze([3, 1, 3]) returns [3, 3]
+static nnfw::misc::tensor::Shape squeeze(const nnfw::misc::tensor::Shape &shape)
+{
+ nnfw::misc::tensor::Shape res(0);
+
+ for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+ {
+ if (shape.dim(axis) != 1)
+ {
+ res.append(shape.dim(axis));
+ }
+ }
+
+ return res;
+}
+
+int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type, void *buffer,
+ size_t length)
+{
+ // Don't check type
+ // Comment about ANeuralNetworksOperandType in NeuralNetworks.h:
+ // If the input or output is optional and omitted then it need not have a fully specified tensor
+ // operand type
+ if ((execution == nullptr) || ((buffer == nullptr) && (length != 0)))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const auto &operands = execution->plan().model().operands();
+
+ // TODO Check type conflicts
+
+ const auto operand_index = execution->plan().model().outputs.at(index);
+ int32_t output_type = operands.at(operand_index).type();
+ const auto &output_shape = operands.at(operand_index).shape();
+
+ if (output_shape.rank() == 1)
+ {
+ const auto len = output_shape.dim(0);
+
+ asVectorSink(execution, output_type, index, len, buffer, length);
+ }
+ else if (output_shape.rank() == 2)
+ {
+ const auto H = output_shape.dim(0);
+ const auto W = output_shape.dim(1);
+
+ asMatrixSink(execution, output_type, index, H, W, buffer, length);
+ }
+ else if (output_shape.rank() == 3)
+ {
+ asTensor3DSink(execution, output_type, index, output_shape, buffer, length);
+ }
+ else if ((output_shape.rank() == 4))
+ {
+ const auto &operand_shape = operands.at(operand_index).shape().asFeature();
+
+ asFeatureSink(execution, output_type, index, operand_shape, buffer, length);
+ }
+ else
+ {
+ // NOTE TensorSink is much slower than specialized Sink(s)
+ const auto &shape = operands.at(operand_index).shape();
+ asTensorSink(execution, output_type, index, shape, buffer, length);
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
+ ANeuralNetworksEvent **event)
+{
+ if ((execution == nullptr) || (event == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ // TODO: Handle event
+ ANeuralNetworksEvent *event_ptr = new ANeuralNetworksEvent{};
+ if (event_ptr == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+ *event = event_ptr;
+
+ return ANeuralNetworksExecution_compute(execution);
+}
+
+int ANeuralNetworksExecution_compute(ANeuralNetworksExecution *execution)
+{
+ if (execution == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const bool sync = profiling::Context::get().sync();
+ const auto &plan = execution->plan();
+ const auto &model = plan.model();
+
+ // Set input(s)
+ for (uint32_t n = 0; n < model.inputs.size(); ++n)
+ {
+ auto setter = [&](::arm_compute::ITensor &tensor) { execution->source(n).push(tensor); };
+
+ // Some operand may not be defined at plan. Because some operands
+ // may be useless at ACL (ex. shape tensor for Reshape operator)
+ // So added a sanity check.
+ if (plan.operands().exist(model.inputs.at(n)))
+ {
+ plan.operands().at(model.inputs.at(n)).access(setter);
+ }
+ }
+
+ const auto &operations = execution->plan().operations();
+
+ for (uint32_t n = 0; n < operations.size(); ++n)
+ {
+ auto prof = profiling::Context::get().getProfiler();
+ SCOPED_OPERATOR_PROFILE(prof, operations.at(n).op_idx());
+ operations.at(n).run();
+
+ if (sync)
+ {
+ arm_compute::CLScheduler::get().sync();
+ }
+ }
+
+ // Get output(s)
+ for (uint32_t n = 0; n < model.outputs.size(); ++n)
+ {
+ auto getter = [&](::arm_compute::ITensor &tensor) { execution->sink(n).pull(tensor); };
+
+ plan.operands().at(model.outputs.at(n)).access(getter);
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksExecution_free(ANeuralNetworksExecution *execution) {}
+
+// TODO: implement this. added to fix link error on test build.
+int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length)
+{
+ if ((execution == nullptr) || (memory == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ assert(false);
+ return -1;
+}
+
+// TODO: implement this. added to fix link error on test build.
+int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution *execution, int32_t index,
+ const ANeuralNetworksOperandType *type,
+ const ANeuralNetworksMemory *memory, size_t offset,
+ size_t length)
+{
+ if ((execution == nullptr) || (memory == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ assert(false);
+ return -1;
+}
+
+int ANeuralNetworksExecution_getOutputOperandRank(ANeuralNetworksExecution *execution,
+ int32_t index, uint32_t *rank)
+{
+ if ((execution == nullptr) || (rank == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const auto &operands = execution->plan().model().operands();
+ const auto operand_index = execution->plan().model().outputs.at(index);
+ const auto &output_shape = operands.at(operand_index).shape();
+
+ *rank = output_shape.rank();
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksExecution_getOutputOperandDimensions(ANeuralNetworksExecution *execution,
+ int32_t index, uint32_t *dimensions)
+{
+ if ((execution == nullptr) || (dimensions == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const auto &operands = execution->plan().model().operands();
+ const auto operand_index = execution->plan().model().outputs.at(index);
+ const auto &output_shape = operands.at(operand_index).shape();
+
+ for (uint32_t axis = 0; axis < output_shape.rank(); ++axis)
+ {
+ dimensions[axis] = static_cast<uint32_t>(output_shape.dim(axis));
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
diff --git a/runtime/contrib/pure_arm_compute/src/execution.h b/runtime/contrib/pure_arm_compute/src/execution.h
new file mode 100644
index 000000000..f55ab3fbf
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/execution.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file execution.h
+ * @brief This file contains ANeuralNetworksExecution class for handling Execution NNAPI such as
+ * ANeuralNetworksExecution_create, ANeuralNetworksExecution_setInput
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __EXECUTION_H__
+#define __EXECUTION_H__
+
+#include "internal/arm_compute.h"
+#include "internal/Sink.h"
+#include "internal/Source.h"
+
+/**
+ * @brief struct to express Execution of NNAPI
+ */
+struct ANeuralNetworksExecution
+{
+public:
+ /**
+ * @brief Construct with params
+ * @param [in] plan Pointer to get internal::arm_compute::Plan
+ */
+ ANeuralNetworksExecution(const std::shared_ptr<const internal::arm_compute::Plan> &plan)
+ : _plan{plan}
+ {
+ _sources.resize(_plan->model().inputs.size());
+ _sinks.resize(_plan->model().outputs.size());
+ }
+
+public:
+ /**
+ * @brief Get reference of internal::arm_compute::Plan
+ * @return Const reference of internal::arm_compute::Plan
+ */
+ const internal::arm_compute::Plan &plan(void) const { return *_plan; }
+
+private:
+ std::shared_ptr<const internal::arm_compute::Plan> _plan;
+
+public:
+ /**
+ * @brief Set the nth source with param
+ * @param [in] n Index of the nth source
+ * @param [in] source Pointer to set the nth source from
+ * @return N/A
+ */
+ // TODO Use InputIndex instead of int
+ void source(int n, std::unique_ptr<Source> &&source) { _sources.at(n) = std::move(source); }
+ /**
+ * @brief Set the nth source with param
+ * @param [in] n Index of the nth source
+ * @param [in] args Arguments to set the nth source from
+ * @return N/A
+ */
+ template <typename T, typename... Args> void source(int n, Args &&... args)
+ {
+ source(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}});
+ }
+
+public:
+ /**
+ * @brief Get the nth source
+ * @param [in] n Index of the nth source
+ * @return Const reference of Source
+ */
+ const Source &source(int n) const { return *(_sources.at(n)); }
+
+public:
+ /**
+ * @brief Set the nth sink with param
+ * @param [in] n Index of the nth sink
+ * @param [in] sink Pointer to set the nth sink from
+ * @return N/A
+ */
+ // TODO Use OutputIndex instead of int
+ void sink(int n, std::unique_ptr<Sink> &&sink) { _sinks.at(n) = std::move(sink); }
+ /**
+ * @brief Set the nth sink with param
+ * @param [in] n Index of the nth sink
+ * @param [in] args Arguments to set the nth sink from
+ * @return N/A
+ */
+ template <typename T, typename... Args> void sink(int n, Args &&... args)
+ {
+ sink(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}});
+ }
+
+public:
+ /**
+ * @brief Get the nth sink
+ * @param [in] n Index of the nth sink
+ * @return Const reference of Sink
+ */
+ const Sink &sink(int n) const { return *(_sinks.at(n)); }
+
+private:
+ std::vector<std::unique_ptr<Source>> _sources;
+ std::vector<std::unique_ptr<Sink>> _sinks;
+};
+
+#endif
diff --git a/runtime/contrib/pure_arm_compute/src/internal/FeatureSink.h b/runtime/contrib/pure_arm_compute/src/internal/FeatureSink.h
new file mode 100644
index 000000000..7c6884141
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/FeatureSink.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        FeatureSink.h
+ * @brief       This file contains FeatureSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_FEATURE_SINK_H__
+#define __INTERNAL_FEATURE_SINK_H__
+
+#include "internal/Sink.h"
+#include "internal/nnapi/feature/View.h"
+#include "internal/arm_compute/feature/View.h"
+
+#include <misc/feature/Shape.h>
+#include "misc/feature/IndexIterator.h"
+
+/**
+ * @brief Class to store Feature(4D) output data.
+ * This is for pulling data to internal tensor from other tensor.
+ * @tparam T Type of the data elements
+ */
+template <typename T> class FeatureSink final : public Sink
+{
+public:
+ /**
+ * @brief Construct a FeatureSink object
+ *
+ * @param[in] shape 4D tensor dimensions for this feature
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ FeatureSink(const nnfw::misc::feature::Shape &shape, T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Pull the data into the internal structure
+ * @param[in] tensor The tensor which contains source data
+ * @return N/A
+ */
+ void pull(::arm_compute::ITensor &tensor) const override
+ {
+ const ::internal::arm_compute::feature::View<T> from{&tensor};
+ // TODO Should remove casting.
+ // Inevitably casting must be done.
+ ::internal::nnapi::feature::View<T> into{_shape, _base, _size};
+
+ ::nnfw::misc::feature::iterate(_shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+
+private:
+ const nnfw::misc::feature::Shape _shape;
+ T *const _base;
+ const size_t _size;
+};
+
+#endif // __INTERNAL_FEATURE_SINK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/FeatureSource.h b/runtime/contrib/pure_arm_compute/src/internal/FeatureSource.h
new file mode 100644
index 000000000..772beb701
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/FeatureSource.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        FeatureSource.h
+ * @brief       This file contains FeatureSource class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_FEATURE_SOURCE_H__
+#define __INTERNAL_FEATURE_SOURCE_H__
+
+#include <misc/feature/Shape.h>
+#include <misc/feature/IndexIterator.h>
+
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/arm_compute/feature/View.h"
+
+/**
+ * @brief Class to store feature(4D) input data.
+ * This is for push out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
+template <typename T> class FeatureSource final : public Source
+{
+public:
+ /**
+ * @brief Construct a FeatureSource object
+ *
+ * @param[in] shape 4D tensor dimensions for this feature
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ FeatureSource(const nnfw::misc::feature::Shape &shape, const T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Push the data out to the another tensor
+ * @param[out] The tensor that output data will be stored
+ * @return N/A
+ */
+ void push(::arm_compute::ITensor &tensor) const override
+ {
+ const ::internal::nnapi::feature::Reader<T> from{_shape, _base, _size};
+ ::internal::arm_compute::feature::View<T> into{&tensor};
+
+ ::nnfw::misc::feature::iterate(_shape)
+ << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+ const auto value = from.at(batch, ch, row, col);
+ into.at(batch, ch, row, col) = value;
+ };
+ }
+
+private:
+ const nnfw::misc::feature::Shape _shape;
+ const T *const _base;
+ const size_t _size;
+};
+
+#endif // __INTERNAL_FEATURE_SOURCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/IExecutionBuilder.h b/runtime/contrib/pure_arm_compute/src/internal/IExecutionBuilder.h
new file mode 100644
index 000000000..2a6e2a743
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/IExecutionBuilder.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file IExecutionBuilder.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines interface of ExecutionBuilder
+ */
+#ifndef __INTERNAL_IEXECUTION_BUILDER_H__
+#define __INTERNAL_IEXECUTION_BUILDER_H__
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include <memory>
+#include <string>
+
+/**
+ * @brief Struct to define interface of ExecutionBuilder
+ */
+struct IExecutionBuilder
+{
+ /**
+ * @brief Destroy the IExecutionBuilder object
+ */
+ virtual ~IExecutionBuilder() = default;
+
+ /**
+ * @brief Append function to execute
+ * @param[in] name Name of function
+ * @param[in] f Function to append
+ * @return N/A
+ */
+ virtual void append(const std::string &name, std::unique_ptr<::arm_compute::IFunction> &&f) = 0;
+};
+
+#endif // __INTERNAL_IEXECUTION_BUILDER_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h b/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h
new file mode 100644
index 000000000..23ecc112b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/MatrixSink.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file MatrixSink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines MatrixSink class
+ */
+#ifndef __INTERNAL_MATRIX_SINK_H__
+#define __INTERNAL_MATRIX_SINK_H__
+
+#include "internal/Sink.h"
+
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/core/Window.h>
+#include <arm_compute/core/Helpers.h>
+
+#include <cstdint>
+#include <cstring>
+#include <cassert>
+
+/**
+ * @brief Class to get matrix data from arm compute tensor
+ */
+template <typename T> class MatrixSink final : public Sink
+{
+public:
+ /**
+ * @brief Construct a new Matrix Sink object
+ * @param[in] H Height of matrix
+ * @param[in] W Width of matrix
+ * @param[in] base Pointer to get data
+ * @param[in] size Size of matrix
+ */
+ MatrixSink(const int32_t H, const int32_t W, T *base, const size_t size)
+ : _height{H}, _width{W}, _base{base}
+ {
+ assert(size >= _height * _width * sizeof(T));
+ }
+
+public:
+ /**
+ * @brief Get matrix data from arm compute tensor to base
+ * @param[in] tensor Tensor object of arm compute to get data
+ * @return N/A
+ */
+ void pull(::arm_compute::ITensor &tensor) const override
+ {
+ assert(tensor.info()->dimension(0) == _width);
+ assert(tensor.info()->dimension(1) == _height);
+
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Coordinates;
+ using ::arm_compute::execute_window_loop;
+
+ Window window;
+
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY);
+
+ Iterator it(&tensor, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ const auto row = id.y();
+ memcpy(_base + row * _width, it.ptr(), _width * sizeof(T));
+ },
+ it);
+ }
+
+private:
+ const int32_t _height;
+ const int32_t _width;
+
+private:
+ T *const _base;
+};
+
+#endif // __INTERNAL_MATRIX_SINK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h b/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h
new file mode 100644
index 000000000..71d6a804f
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/MatrixSource.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file MatrixSource.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines MatrixSource class
+ */
+#ifndef __INTERNAL_MATRIX_SOURCE_H__
+#define __INTERNAL_MATRIX_SOURCE_H__
+
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/core/Window.h>
+#include <arm_compute/core/Helpers.h>
+
+#include "internal/Source.h"
+
+/**
+ * @brief Class to push matrix data to arm compute tensor
+ */
+template <typename T> class MatrixSource final : public Source
+{
+public:
+ /**
+ * @brief Construct a new MatrixSource object
+ * @param[in] shape Shape of matrix
+ * @param[in] base Pointer of matrix data to push
+ * @param[in] size Size of matrix
+ */
+ MatrixSource(const nnfw::misc::matrix::Shape &shape, const T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // do nothing
+ }
+
+public:
+ /**
+ * @brief Push matrix data to arm compute tensor
+ * @param[out] tensor Tensor object of arm compute to push matrix data
+ * @return N/A
+ */
+ void push(::arm_compute::ITensor &tensor) const override
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Coordinates;
+ using ::arm_compute::execute_window_loop;
+
+ Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY);
+
+ int32_t width = _shape.W;
+
+ Iterator it(&tensor, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ const auto height = id.y();
+ memcpy(it.ptr(), _base + height * width, width * sizeof(T));
+ },
+ it);
+ }
+
+private:
+ const nnfw::misc::matrix::Shape _shape;
+ const T *const _base;
+ const size_t _size;
+};
+
+#endif // __INTERNAL_MATRIX_SOURCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Model.cc b/runtime/contrib/pure_arm_compute/src/internal/Model.cc
new file mode 100644
index 000000000..03753fea2
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Model.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/Model.h"
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+Shape::Shape(uint32_t rank) : nnfw::misc::tensor::Shape(rank)
+{
+ // DO NOTHING
+}
+
+int32_t Shape::asVector(void) const
+{
+ assert(rank() == 1);
+
+ return dim(0);
+}
+
+nnfw::misc::matrix::Shape Shape::asMatrix(void) const
+{
+ assert(rank() == 2);
+
+ const auto height = dim(0);
+ const auto width = dim(1);
+
+ return nnfw::misc::matrix::Shape(height, width);
+}
+
+nnfw::misc::feature::Shape Shape::asFeature(void) const
+{
+ assert(rank() == 4);
+
+ // Feature Map in NNAPI
+ // - Dimension(0) -> Batch
+ // - Dimension(1) -> Height
+ // - Dimension(2) -> Width
+ // - Dimension(3) -> Depth
+
+ const auto batch = dim(0);
+ const auto depth = dim(3);
+ const auto height = dim(1);
+ const auto width = dim(2);
+
+ return nnfw::misc::feature::Shape(batch, depth, height, width);
+}
+
+nnfw::misc::tensor::Shape Shape::asTensor(void) const
+{
+ return nnfw::misc::tensor::Shape(*this); // this shape represents shape of NNAPI
+}
+
+nnfw::misc::kernel::Shape Shape::asKernel(void) const
+{
+ assert(rank() == 4);
+
+ // Convolution Kernel in NNAPI
+ // - Dimension(0) -> Count
+ // - Dimension(1) -> Height
+ // - Dimension(2) -> Width
+ // - Dimension(3) -> Depth
+ const auto count = dim(0);
+ const auto depth = dim(3);
+ const auto height = dim(1);
+ const auto width = dim(2);
+
+ return nnfw::misc::kernel::Shape(count, depth, height, width);
+}
+
+// Extended dimension is filled with 1.
+void Shape::extendRank(size_t to_rank)
+{
+ for (int i = rank() + 1; i <= to_rank; ++i)
+ {
+ prepend(1);
+ }
+}
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+Index Set::append(const Shape &shape, int32_t type, float scale, int32_t zeroPoint)
+{
+ int32_t index = _objects.size();
+
+ _objects.emplace_back(new Object{shape, type, scale, zeroPoint});
+
+ return Index{index};
+}
+
+const Object &Set::at(const Index &index) const { return *(_objects.at(index.asInt())); }
+
+Object &Set::at(const Index &index) { return *(_objects.at(index.asInt())); }
+
+bool Set::exist(const Index &index) const
+{
+ return index.asInt() >= 0 && index.asInt() < _objects.size();
+}
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Model.h b/runtime/contrib/pure_arm_compute/src/internal/Model.h
new file mode 100644
index 000000000..bdcf32f6f
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Model.h
@@ -0,0 +1,538 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Model.h
+ * @brief This file contains classes for handle internal Model object
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_MODEL_H__
+#define __INTERNAL_MODEL_H__
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+/**
+ * @brief Class to express index of operand.
+ */
+class Index
+{
+public:
+ /**
+ * @brief Construct a new Index object for operand with param.
+ * @param [in] value The number of index
+ */
+ explicit Index(int value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get index value as int
+ * @return Index value as int
+ */
+ int asInt(void) const { return _value; }
+
+private:
+ int _value;
+};
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
+
+#include <vector>
+#include <cstdint>
+
+#include "misc/feature/Shape.h"
+#include "misc/matrix/Shape.h"
+#include "misc/kernel/Shape.h"
+#include "misc/tensor/Shape.h"
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+/**
+ * @brief Class to express shape of operand.
+ */
+struct Shape : public nnfw::misc::tensor::Shape
+{
+public:
+ /**
+ * @brief Construct a new Shape object for operand with param.
+ * @param [in] rank The rank value of shape
+ */
+ Shape(uint32_t rank);
+
+public:
+ /**
+ * @brief Get dimension value of tensor as vector
+ * @return Dimension value(int32_t) of tensor as vector
+ */
+ int32_t asVector(void) const;
+ /**
+ * @brief Get dimension values of tensor as feature::Shape
+ * @return Dimension values of tensor as feature::Shape
+ */
+ nnfw::misc::feature::Shape asFeature(void) const;
+ /**
+ * @brief Get dimension values of tensor as matrix::Shape
+ * @return Dimension values of tensor as matrix::Shape
+ */
+ nnfw::misc::matrix::Shape asMatrix(void) const;
+ /**
+ * @brief Get dimension values of tensor as kernel::Shape
+ * @return Dimension values of tensor as kernel::Shape
+ */
+ nnfw::misc::kernel::Shape asKernel(void) const;
+ /**
+ * @brief Get dimension values of tensor::Shape
+ * @return Dimension values of tensor::Shape
+ */
+ nnfw::misc::tensor::Shape asTensor(void) const;
+
+public:
+ /**
+ * @brief Extend rank of Shape object for operand with param.
+ * @param [in] to_rank The rank value to be extended to
+ * @return N/A
+ */
+ void extendRank(size_t);
+};
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
+
+#include <algorithm>
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+/**
+ * @brief Class to have data of operand.
+ */
+struct Data
+{
+ /**
+ * @brief Destruct this object
+ */
+ virtual ~Data() = default;
+
+ /**
+ * @brief Get size of data
+ * @return size of data
+ */
+ virtual size_t size(void) const = 0;
+ /**
+ * @brief Get the base address of data
+ * @return the base address of data
+ */
+ virtual const uint8_t *base(void) const = 0;
+};
+
+/**
+ * @brief Class to have cached data of operand.
+ */
+class CachedData final : public Data
+{
+public:
+ /**
+ * @brief Construct a new CachedData object for operand with param.
+ * @param [in] base the base address of data
+ * @param [in] size the size of data
+ */
+ CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size}
+ {
+ std::copy(base, base + size, _base);
+ }
+
+public:
+ /**
+ * @brief Destruct this object
+ */
+ ~CachedData() { delete[] _base; }
+
+public:
+ /**
+ * @brief Get size of data
+ * @return size of data
+ */
+ size_t size(void) const override { return _size; }
+ /**
+ * @brief Get the base address of data
+ * @return the base address of data
+ */
+ const uint8_t *base(void) const override { return _base; }
+
+private:
+ uint8_t *_base;
+ size_t _size;
+};
+
+/**
+ * @brief Class to have external data of operand.
+ */
+class ExternalData final : public Data
+{
+public:
+ /**
+ * @brief Construct a new ExternalData object for operand with param.
+ * @param [in] base the base address of data
+ * @param [in] size the size of data
+ */
+ ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get size of data
+ * @return size of data
+ */
+ size_t size(void) const override { return _size; }
+ /**
+ * @brief Get the base address of data
+ * @return the base address of data
+ */
+ const uint8_t *base(void) const override { return _base; }
+
+private:
+ const uint8_t *_base;
+ const size_t _size;
+};
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
+
+#include <memory>
+#include <cassert>
+#include <functional>
+#include "internal/Swizzle.h"
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+/**
+ * @brief Class to express operand as object.
+ */
+class Object
+{
+public:
+ /**
+ * @brief Construct a new Object object for operand with param.
+ * @param [in] shape shape of operand
+ * @param [in] type type of operand
+ * @param [in] scale scale of operand
+ * @param [in] zeroPoint zeroPoint of operand
+ */
+ explicit Object(const Shape &shape, const int32_t type, const float scale,
+ const int32_t zeroPoint)
+ : _shape{shape}, _type{type}, _scale{scale}, _zeroPoint{zeroPoint}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get shape of operand
+ * @return Reference of shape of operand
+ */
+ const Shape &shape(void) const { return _shape; }
+ /**
+ * @brief Get type of operand
+ * @return type of operand
+ */
+ const int32_t type(void) const { return _type; }
+ /**
+ * @brief Get scale of operand
+ * @return scale of operand
+ */
+ const float scale(void) const { return _scale; }
+ /**
+ * @brief Get zeroPoint of operand
+ * @return zeroPoint of operand
+ */
+ const int32_t zeroPoint(void) const { return _zeroPoint; }
+
+private:
+ void data(std::unique_ptr<Data> &&data) { _data = std::move(data); }
+
+public:
+ /**
+ * @brief Get data of operand
+ * @return Reference of data of operand
+ */
+ const Data &data(void) const { return *_data; }
+ /**
+ * @brief Get true if Object has data, otherwise @c false
+ * @return @c true if Object has data, otherwise @c false
+ */
+ bool hasData(void) const { return _data != nullptr; }
+
+public:
+ /**
+ * @brief Set data of operand with param
+ * @param [in] args arguments of data to be set
+ * @return N/A
+ */
+ template <typename T, typename... Args> void data(Args &&... args)
+ {
+ data(std::unique_ptr<T>(new T{std::forward<Args>(args)...}));
+ }
+
+public:
+ /**
+ * @brief Get value of data as scalar
+ * @return value of data as scalar
+ */
+ template <typename T> T asScalar(void) const
+ {
+ assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1)));
+ assert(_data != nullptr);
+ assert((_data->base() != nullptr) && (_data->size() == sizeof(T)));
+
+ return *(reinterpret_cast<const T *>(_data->base()));
+ }
+
+public:
+ /**
+ * @brief Get value of data as ReorderBits
+ * @param [in] numOfBits The number of bits to be reordered to
+ * @return value of data as ReorderBits
+ */
+ template <typename T> T asReorderBits(size_t numOfBits) const
+ {
+ assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1)));
+ assert(_data != nullptr);
+ assert((_data->base() != nullptr) && (_data->size() == sizeof(T)));
+
+ return ReorderBits<T>(asScalar<T>(), numOfBits);
+ }
+
+private:
+ const Shape _shape;
+ const int32_t _type;
+ const float _scale;
+ const int32_t _zeroPoint;
+ std::unique_ptr<Data> _data;
+};
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace operand
+{
+
+/**
+ * @brief Class to have object instances in a kind of set
+ */
+class Set
+{
+public:
+ /**
+ * @brief Iterate objects with fn
+ * @param [in] fn function to be iterated
+ * @return N/A
+ */
+ void iterate(const std::function<void(const Index &)> &fn)
+ {
+ for (uint32_t n = 0; n < _objects.size(); ++n)
+ {
+ const Index operand_index{static_cast<int>(n)};
+ fn(operand_index);
+ }
+ }
+
+public:
+ /**
+ * @brief Append Object for operand with param
+ * @param [in] shape shape of operand
+ * @param [in] type type of operand
+ * @param [in] scale scale of operand
+ * @param [in] zeroPoint zeroPoint of operand
+ * @return Value of Index which has been appended to
+ */
+ Index append(const Shape &, int32_t type, float scale, int32_t zeroPoint);
+
+public:
+ /**
+ * @brief Get Object at Index
+ * @param [in] index Index to be at
+ * @return Const refernece of Object
+ */
+ const Object &at(const Index &) const;
+ /**
+ * @brief Get Object at Index
+ * @param [in] index Index to be at
+ * @return Refernece of Object
+ */
+ Object &at(const Index &);
+ /**
+ * @brief Get size of operands in Set
+ * @return Value of size
+ */
+ size_t size(void) const { return _objects.size(); }
+ bool exist(const Index &) const;
+
+private:
+ std::vector<std::unique_ptr<Object>> _objects;
+};
+
+} // namespace operand
+} // namespace tflite
+} // namespace internal
+
+#include "internal/op/NodeVisitor.h"
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+
+/**
+ * @brief Class to have sequence operators.
+ */
+class Sequence
+{
+public:
+ /**
+ * @brief Construct a new Sequence object for operator as default
+ */
+ Sequence() = default;
+
+public:
+ /**
+ * @brief Get size of operators in Sequence
+ * @return Value of size
+ */
+ uint32_t size(void) const { return _ops.size(); }
+
+public:
+ /**
+ * @brief Get op::Node at Index
+ * @param [in] nth index to be at
+ * @return Refernece of op::Node
+ */
+ op::Node &at(uint32_t nth) { return *(_ops.at(nth)); }
+ /**
+ * @brief Get op::Node at Index
+ * @param [in] nth index to be at
+ * @return Const refernece of op::Node
+ */
+ const op::Node &at(uint32_t nth) const { return *(_ops.at(nth)); }
+
+private:
+ Sequence &emplace_back(std::unique_ptr<op::Node> &&node)
+ {
+ _ops.emplace_back(std::move(node));
+ return (*this);
+ }
+
+public:
+ /**
+ * @brief Add op::Node with param
+ * @param [in] args arguments of op::Node to be set
+ * @return Reference of Sequence
+ */
+ template <typename T, typename... Args> Sequence &emplace_back(Args &&... args)
+ {
+ return emplace_back(std::unique_ptr<T>(new T{std::forward<Args>(args)...}));
+ }
+
+private:
+ std::vector<std::unique_ptr<op::Node>> _ops;
+};
+
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+
+/**
+ * @brief Class to have operand::Set as operands and op::Sequence as operators
+ */
+class Model
+{
+public:
+ /**
+ * @brief Get operand::Set
+ * @return Reference of operand::Set
+ */
+ operand::Set &operands(void) { return _operands; }
+ /**
+ * @brief Get operand::Set
+ * @return Const reference of operand::Set
+ */
+ const operand::Set &operands(void) const { return _operands; }
+
+public:
+ /**
+ * @brief Get op::Sequence
+ * @return Reference of op::Sequence
+ */
+ op::Sequence &operations(void) { return _operations; }
+ /**
+ * @brief Get op::Sequence
+ * @return Const reference of op::Sequence
+ */
+ const op::Sequence &operations(void) const { return _operations; }
+
+private:
+ operand::Set _operands;
+ op::Sequence _operations;
+
+public:
+ // TODO Hide these fields
+ std::vector<operand::Index> inputs; /**< indexes of operand as input */
+ std::vector<operand::Index> outputs; /**< indexes of operand as output */
+};
+
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_MODEL_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Sink.h b/runtime/contrib/pure_arm_compute/src/internal/Sink.h
new file mode 100644
index 000000000..6f44561ea
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Sink.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Sink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Sink struct
+ */
+#ifndef __INTERNAL_SINK_H__
+#define __INTERNAL_SINK_H__
+
+#include <arm_compute/core/ITensor.h>
+
+/**
+ * @brief Struct to get tensor data from arm compute tensor (abstract)
+ */
+struct Sink
+{
+ /**
+ * @brief Destroy the Sink object
+ */
+ virtual ~Sink() = default;
+
+ /**
+ * @brief Get tensor data from arm compute tensor
+ * @param[in] tensor Tensor object of arm compute to get data
+ * @return N/A
+ */
+ virtual void pull(::arm_compute::ITensor &tensor) const = 0;
+};
+
+#endif // __INTERNAL_SINK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Sinks.h b/runtime/contrib/pure_arm_compute/src/internal/Sinks.h
new file mode 100644
index 000000000..7317c67c1
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Sinks.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        Sinks.h
+ * @brief       This file contains TensorSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_SINKS_H__
+#define __INTERNAL_SINKS_H__
+
+#include "internal/Sink.h"
+
+// TODO Extract TensorSink into TensorSink.h
+//
+// TensorSink
+//
+#include "internal/Swizzle.h"
+
+#include "internal/nnapi/tensor/View.h"
+#include "internal/arm_compute/tensor/View.h"
+
+#include "misc/tensor/IndexIterator.h"
+
+/**
+ * @brief Class to store NN model output data for general-shaped tensors.
+ * This is for pulling data to internal tensor from other tensor.
+ * @tparam T Type of the data elements
+ */
+template <typename T> class TensorSink final : public Sink
+{
+public:
+ /**
+ * @brief Construct a TensorSink object
+ *
+ * @param[in] shape general-shaped tensor dimensions
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ TensorSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Pull the data into the internal structure
+ * @param[in] tensor The tensor which contains source data
+ * @return N/A
+ */
+ void pull(::arm_compute::ITensor &tensor) const override
+ {
+ const ::internal::arm_compute::tensor::View<T> from{&tensor};
+ ::internal::nnapi::tensor::View<T> into{_shape, _base, _size};
+
+ using ::nnfw::misc::tensor::iterate;
+ using ::nnfw::misc::tensor::Index;
+
+ const uint32_t rank = _shape.rank();
+
+ ::nnfw::misc::tensor::iterate(_shape) << [&](const Index &raw) {
+ Index permuted(raw.rank());
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ permuted.at(ToARMComputeAxis(rank, axis).value()) = raw.at(axis);
+ }
+
+ const auto value = from.at(permuted);
+ into.at(raw) = value;
+ };
+ }
+
+private:
+ const nnfw::misc::tensor::Shape _shape;
+
+private:
+ T *const _base;
+ const size_t _size;
+};
+
+#endif // __INTERNAL_SINKS_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Source.h b/runtime/contrib/pure_arm_compute/src/internal/Source.h
new file mode 100644
index 000000000..fa8f1e811
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Source.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Source.h
+ * @brief This file contains Source struct for pushing ITensor
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_SOURCE_H__
+#define __INTERNAL_SOURCE_H__
+
+#include <arm_compute/core/ITensor.h>
+
+/**
+ * @brief Struct to push inner source to ITensor.
+ */
+struct Source
+{
+ /**
+ * @brief Destructor as default
+ */
+ virtual ~Source() = default;
+
+ /**
+ * @brief Push inner source to ITensor
+ * @param [in] tensor ITensor to be pushed into
+ * @return N/A
+ */
+ virtual void push(::arm_compute::ITensor &tensor) const = 0;
+};
+
+#endif // __INTERNAL_SOURCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Swizzle.h b/runtime/contrib/pure_arm_compute/src/internal/Swizzle.h
new file mode 100644
index 000000000..f127b8a3b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Swizzle.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Swizzle.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines ARMComputeAxis class and utility functions to support mapping
+ * between arm compute axis and NNAPI axis
+ */
+#ifndef __SWIZZLE_H__
+#define __SWIZZLE_H__
+
+/**
+ * @brief Class to represent arm compute axis
+ */
+class ARMComputeAxis
+{
+public:
+ /**
+ * @brief Construct a new ARMComputeAxis object
+ */
+ ARMComputeAxis() = default;
+
+public:
+ /**
+ * @brief Construct a new ARMComputeAxis object
+ * @param[in] value Raw axis number
+ */
+ explicit ARMComputeAxis(uint32_t value) : _value{value}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get raw axis number
+ * @return Raw axis number
+ */
+ uint32_t value(void) const { return _value; }
+
+private:
+ uint32_t _value;
+};
+
+/**
+ * @brief Convert T/F Lite / NNAPI axis (based on ...NHWC) to arm compute axis (WHCN...)
+ * @param[in] rank Rank of shape
+ * @param[in] axis Axis to map
+ * @return ARMComputeAxis including arm compute axis info
+ */
+inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
+{
+ assert(rank > axis);
+ const ARMComputeAxis reversed{(rank - axis) - 1};
+
+ if (rank < 4)
+ {
+ return reversed;
+ }
+
+ // DEPTH
+ if (0 == reversed.value())
+ {
+ return ARMComputeAxis{2};
+ }
+ // WIDTH
+ if (1 == reversed.value())
+ {
+ return ARMComputeAxis{0};
+ }
+ // HEIGHT
+ if (2 == reversed.value())
+ {
+ return ARMComputeAxis{1};
+ }
+
+ // ELSE
+ return reversed;
+}
+
+#include <cassert>
+
+/**
+ * @brief Covert bitmask info from NNAPI axis to arm compute axis
+ * @param[in] in Bitmask data
+ * @param[in] numOfBits Used bits (rank)
+ * @return Coverted bitmask
+ */
+template <typename T> inline T ReorderBits(T in, size_t numOfBits)
+{
+ assert(numOfBits > 0);
+ T out = 0;
+ for (int32_t i = numOfBits - 1; i >= 0; --i)
+ {
+ const uint32_t toShift = numOfBits - ToARMComputeAxis(numOfBits, i).value() - 1;
+ out += ((in & 1) << toShift);
+ in >>= 1;
+ }
+ return out;
+}
+
+#endif // __SWIZZLE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h
new file mode 100644
index 000000000..1e14e2d6c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSink.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Tensor3DSink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Tensor3DSink class
+ */
+#ifndef __TENSOR3D_SINK_H__
+#define __TENSOR3D_SINK_H__
+
+#include "internal/Sink.h"
+
+//
+// This is mempcy() version of generic TensorSink for 3D tensor
+//
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/core/Window.h>
+#include <arm_compute/core/Helpers.h>
+
+/**
+ * @brief Class to get tensor data from arm compute tensor
+ */
+template <typename T> class Tensor3DSink final : public Sink
+{
+public:
+ /**
+ * @brief Construct a new Tensor3DSink object
+ * @param[in] shape Shape of tensor
+ * @param[in] base Pointer to get data
+ * @param[in] size Size of tensor
+ */
+ Tensor3DSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get tensor data from arm compute tensor to base
+ * @param[in] tensor Tensor object of arm compute to get data
+ * @return N/A
+ */
+ void pull(::arm_compute::ITensor &tensor) const override
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Coordinates;
+ using ::arm_compute::execute_window_loop;
+
+ Window window;
+
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY);
+ int32_t height_width = _shape.dim(1) * _shape.dim(2);
+ int32_t width = _shape.dim(2);
+
+ Iterator it(&tensor, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ const auto z = id.z();
+ const auto y = id.y();
+ memcpy(_base + z * height_width + y * width, it.ptr(), width * sizeof(T));
+ },
+ it);
+ }
+
+private:
+ const nnfw::misc::tensor::Shape _shape;
+
+private:
+ T *const _base;
+ const size_t _size;
+};
+
+#endif // __TENSOR3D_SINK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h
new file mode 100644
index 000000000..3d8d1b958
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/Tensor3DSource.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Tensor3DSource.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Tensor3DSource class
+ */
+#ifndef __TENSOR3D_SOURCE_H__
+#define __TENSOR3D_SOURCE_H__
+
+#include "internal/Source.h"
+
+//
+// This is memcpy() version of generic TensorSource for 3D tensor
+//
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/core/Window.h>
+#include <arm_compute/core/Helpers.h>
+
+/**
+ * @brief Class to push tensor data to arm compute tensor
+ */
+template <typename T> class Tensor3DSource final : public Source
+{
+public:
+ /**
+ * @brief Construct a new Tensor3DSource object
+ * @param[in] shape Shape of tensor
+ * @param[in] base Pointer of tensor data to push
+ * @param[in] size Size of tensor
+ */
+ Tensor3DSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Push tensor data to arm compute tensor
+ * @param[out] tensor Tensor object of arm compute to push tensor data
+ * @return N/A
+ */
+ void push(::arm_compute::ITensor &tensor) const override
+ {
+ using ::arm_compute::Window;
+ using ::arm_compute::Iterator;
+ using ::arm_compute::Coordinates;
+ using ::arm_compute::execute_window_loop;
+
+ Window window;
+
+ window.use_tensor_dimensions(tensor.info()->tensor_shape(), ::arm_compute::Window::DimY);
+ int32_t height_width = _shape.dim(1) * _shape.dim(2);
+ int32_t width = _shape.dim(2);
+
+ Iterator it(&tensor, window);
+ execute_window_loop(window,
+ [&](const ::arm_compute::Coordinates &id) {
+ const auto z = id.z();
+ const auto y = id.y();
+ memcpy(it.ptr(), _base + z * height_width + y * width, width * sizeof(T));
+ },
+ it);
+ }
+
+private:
+ const nnfw::misc::tensor::Shape _shape;
+
+private:
+ const T *const _base;
+ const size_t _size;
+};
+
+#endif // __TENSOR3D_SOURCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/TensorSource.h b/runtime/contrib/pure_arm_compute/src/internal/TensorSource.h
new file mode 100644
index 000000000..114d3588e
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/TensorSource.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TensorSource.h
+ * @brief This file contains TensorSource class which is inherited from Source class
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_TENSOR_SOURCE_H__
+#define __INTERNAL_TENSOR_SOURCE_H__
+
+#include <misc/tensor/Shape.h>
+#include <misc/tensor/IndexIterator.h>
+
+#include "internal/Source.h"
+#include "internal/Swizzle.h"
+#include "internal/nnapi/tensor/Reader.h"
+#include "internal/arm_compute/tensor/View.h"
+
+// NOTE TensorSource is much slower than specialized Source(s)
+/**
+ * @brief Class to define constructor and push function
+ */
+template <typename T> class TensorSource final : public Source
+{
+public:
+ /**
+ * @brief Construct a new TensorSource object with params
+ * @param [in] shape Shape of tensor
+ * @param [in] base Base address
+ * @param [in] size Size of tensor
+ */
+ TensorSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size)
+ : _shape{shape}, _base{base}, _size{size}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Function for pushing tensor
+ * @param [in] tensor Tensor to be pushed
+ * @return N/A
+ */
+ void push(::arm_compute::ITensor &tensor) const override
+ {
+ const ::internal::nnapi::tensor::Reader<T> from{_shape, _base, _size};
+ ::internal::arm_compute::tensor::View<T> into{&tensor};
+
+ ::nnfw::misc::tensor::iterate(_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) {
+ const auto rank = index_nnapi.rank();
+ nnfw::misc::tensor::Index index_ACL(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ index_ACL.at(ToARMComputeAxis(rank, axis).value()) = index_nnapi.at(axis);
+ }
+
+ into.at(index_ACL) = from.at(index_nnapi);
+ };
+ }
+
+private:
+ const nnfw::misc::tensor::Shape _shape;
+ const T *const _base;
+ const size_t _size;
+};
+
+#endif // __INTERNAL_TENSOR_SOURCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/VectorSink.h b/runtime/contrib/pure_arm_compute/src/internal/VectorSink.h
new file mode 100644
index 000000000..a630ef1c1
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/VectorSink.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        VectorSink.h
+ * @brief       This file contains VectorSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_VECTOR_SINK_H__
+#define __INTERNAL_VECTOR_SINK_H__
+
+#include "internal/Sink.h"
+
+#include <arm_compute/core/ITensor.h>
+
+#include <cassert>
+
+/**
+ * @brief Class to store vector(2D) output data.
+ * This is for pulling out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
+template <typename T> class VectorSink final : public Sink
+{
+public:
+ /**
+ * @brief Construct a VectorSink object
+ * @param[in] vlen Length of the vector
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ VectorSink(const int32_t vlen, T *base, const size_t size) : _vlen{vlen}, _base{base}
+ {
+ assert(size >= _vlen * sizeof(T));
+ }
+
+public:
+ /**
+ * @brief Pull the data into the internal structure
+ * @param[in] tensor The tensor which contains source data
+ * @return N/A
+ */
+ void pull(::arm_compute::ITensor &tensor) const override
+ {
+ for (int32_t n = 0; n < _vlen; ++n)
+ {
+ auto from = reinterpret_cast<T *>(tensor.ptr_to_element(::arm_compute::Coordinates{n}));
+ auto into = _base + n;
+
+ *into = *from;
+ }
+ }
+
+private:
+ const int32_t _vlen;
+ T *const _base;
+};
+
+#endif // __INTERNAL_VECTOR_SINK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/VectorSource.h b/runtime/contrib/pure_arm_compute/src/internal/VectorSource.h
new file mode 100644
index 000000000..48d3d3209
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/VectorSource.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        VectorSource.h
+ * @brief       This file contains VectorSource class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_VECTOR_SOURCE_H__
+#define __INTERNAL_VECTOR_SOURCE_H__
+
+#include "internal/Source.h"
+
+/**
+ * @brief Class to store vector(2D) input data.
+ * This is for push out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
+template <typename T> class VectorSource final : public Source
+{
+public:
+ /**
+ * @brief Construct a VectorSource object
+ * @param[in] vlen Length of the vector
+ * @param[in] base Base pointer of the actual data
+ * @param[in] size Size of the data
+ */
+ VectorSource(const int32_t vlen, const T *base, const size_t size) : _vlen{vlen}, _base{base}
+ {
+ assert(size >= _vlen * sizeof(T));
+ }
+
+public:
+ /**
+ * @brief Push the data out to the another tensor
+ * @param[out] The tensor that output data will be stored
+ * @return N/A
+ */
+ void push(::arm_compute::ITensor &tensor) const override
+ {
+ for (int32_t n = 0; n < _vlen; ++n)
+ {
+ auto from = _base + n;
+ auto into = reinterpret_cast<T *>(tensor.ptr_to_element(::arm_compute::Coordinates{n}));
+
+ *into = *from;
+ }
+ }
+
+private:
+ const int32_t _vlen;
+ const T *const _base;
+};
+
+#endif // __INTERNAL_VECTOR_SOURCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc
new file mode 100644
index 000000000..a7be2068d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/arm_compute.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include <cassert>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace operand
+{
+
+void Object::access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const
+{
+ if (::internal::arm_compute::isGpuMode())
+ {
+ auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+ auto cl_tensor = _tensor.get();
+ CAST_CL(cl_tensor)->map(queue);
+ fn(*_tensor);
+ CAST_CL(cl_tensor)->unmap(queue);
+ }
+ else
+ {
+ fn(*_tensor);
+ }
+}
+
+} // namespace operand
+} // namepsace arm_compute
+} // namespace internal
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace operand
+{
+
+Context &Context::set(const ::internal::tflite::operand::Index &id,
+ const std::shared_ptr<::arm_compute::ITensor> &tensor)
+{
+ assert(_objects.find(id.asInt()) == _objects.end());
+
+ _objects[id.asInt()] = Object{tensor};
+ return (*this);
+}
+
+} // namespace operand
+} // namepsace arm_compute
+} // namespace internal
+
+namespace internal
+{
+namespace arm_compute
+{
+
+bool isGpuMode()
+{
+ char *neon = std::getenv("NEON");
+ if (neon == nullptr)
+ return true;
+ else if (neon[0] == '1')
+ return false;
+ return true;
+}
+
+} // namepsace arm_compute
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h
new file mode 100644
index 000000000..fb6acaf81
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        arm_compute.h
+ * @brief       This file contains arm_compute library related classes
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_ARM_COMPUTE_H__
+#define __INTERNAL_ARM_COMPUTE_H__
+
+#include <arm_compute/core/ITensor.h>
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/Tensor.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace operand
+{
+
+/**
+ * @brief Class to access the tensor object
+ */
+class Object
+{
+public:
+ Object() = default;
+
+public:
+ Object(const std::shared_ptr<::arm_compute::ITensor> &tensor) : _tensor{tensor}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get the tensor pointer
+ * @return The tensor pointer
+ */
+ ::arm_compute::ITensor *ptr(void) const { return _tensor.get(); }
+
+private:
+ std::shared_ptr<::arm_compute::ITensor> _tensor;
+
+public:
+ /**
+ * @brief Access the tensor object and run the given function
+ : @param[in] fn The actual behavior when accessing the tensor object
+ * @return N/A
+ */
+ void access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const;
+};
+
+} // namespace operand
+} // namepsace arm_compute
+} // namespace internal
+
+#include "internal/Model.h"
+
+#include <map>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace operand
+{
+
+/**
+ * @brief Class to manage Object instances
+ */
+class Context
+{
+public:
+ /**
+ * @brief Set index and tensor pair
+ * @param[in] ind The operand index
+ * @param[in] tensor The tensor object
+ * @return This object reference
+ */
+ Context &set(const ::internal::tflite::operand::Index &ind,
+ const std::shared_ptr<::arm_compute::ITensor> &tensor);
+
+public:
+ /**
+ * @brief Check if the tensor for given index is exist
+ * @param[in] ind The operand Index
+ * @return @c true if the entry for ind is exist, otherwise @c false
+ */
+ bool exist(const ::internal::tflite::operand::Index &ind) const
+ {
+ return _objects.find(ind.asInt()) != _objects.end();
+ }
+
+public:
+ /**
+ * @brief Lookup the tensor with the given index
+ * @param[in] ind The index as the key
+ * @return The object const reference
+ */
+ const Object &at(const ::internal::tflite::operand::Index &ind) const
+ {
+ return _objects.at(ind.asInt());
+ }
+
+ /**
+ * @brief Lookup the tensor with the given index
+ * @param[in] ind The index as the key
+ * @return The object reference
+ */
+ Object &at(const ::internal::tflite::operand::Index &ind) { return _objects.at(ind.asInt()); }
+
+private:
+ std::map<int, Object> _objects;
+};
+
+} // namespace operand
+} // namepsace arm_compute
+} // namespace internal
+
+#include <arm_compute/runtime/IFunction.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace op
+{
+
+/**
+ * @brief Class to wrap IFunction
+ */
+class Step
+{
+public:
+ /**
+ * @brief Construct a Step object
+ * @param[in] func The compiled code to be executed
+ */
+ Step(std::unique_ptr<::arm_compute::IFunction> &&func) : _func{std::move(func)}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Run _func
+ * @return N/A
+ */
+ void run(void) const { _func->run(); }
+
+public:
+ /**
+ * @brief Get member @c _name
+ * @return The name as const reference
+ */
+ const std::string &name(void) const { return _name; }
+ /**
+ * @brief Get member @c _name
+ * @return The name as reference
+ */
+ std::string &name(void) { return _name; }
+
+private:
+ std::string _name;
+ std::unique_ptr<::arm_compute::IFunction> _func;
+#ifdef TFLITE_PROFILING_ENABLED
+public:
+ /**
+ * @brief Get member @c _op_index
+ * @return The operation index as value
+ */
+ int op_idx() const { return _op_idx; }
+ /**
+ * @brief Get member @c _op_index
+ * @return The operation index as reference
+ */
+ int &op_idx() { return _op_idx; }
+private:
+ int _op_idx;
+#endif
+};
+
+} // namespace op
+} // namepsace arm_compute
+} // namespace internal
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace op
+{
+
+/**
+ * @brief Class managing compiled operation code Sequence
+ */
+class Sequence
+{
+public:
+ /**
+ * @brief Get size of sequence
+ * @return Number of sequence steps
+ */
+ uint32_t size(void) const { return _functions.size(); }
+
+public:
+ /**
+ * @brief Append a Function to the sequence
+ * @param[in] func Function to be appended
+ * @return This object reference
+ */
+ Sequence &append(std::unique_ptr<::arm_compute::IFunction> &&func)
+ {
+ _functions.emplace_back(std::move(func));
+ return (*this);
+ }
+
+public:
+ /**
+ * @brief Get the step entry on the index @c n
+ * @param[in] n The index
+ * @return The step object as reference
+ */
+ Step &at(uint32_t n) { return _functions.at(n); }
+ /**
+ * @brief Get the step entry on the index @c n
+ * @param[in] n The index
+ * @return The step object as const reference
+ */
+ const Step &at(uint32_t n) const { return _functions.at(n); }
+
+private:
+ // TODO Rename _functions as _steps
+ std::vector<Step> _functions;
+};
+
+} // namespace op
+} // namepsace arm_compute
+} // namespace internal
+
+namespace internal
+{
+namespace arm_compute
+{
+
+/**
+ * @brief Class to manage compiled operation sequence
+ */
+class Plan
+{
+public:
+ /**
+ * @brief Construct a Plan object
+ * @param[in] model Model that we want to compile
+ */
+ Plan(const std::shared_ptr<const ::internal::tflite::Model> &model) : _model(model)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get the model object
+ * @return The model object as const reference
+ */
+ const ::internal::tflite::Model &model(void) const { return *_model; }
+
+public:
+ /**
+ * @brief Get operand context
+ * @return The operand context as reference
+ */
+ operand::Context &operands(void) { return _operands; }
+ /**
+ * @brief Get operand context
+ * @return The operand context as const reference
+ */
+ const operand::Context &operands(void) const { return _operands; }
+
+public:
+ /**
+ * @brief Get operation sequence
+ * @return The operation sequence as reference
+ */
+ op::Sequence &operations(void) { return _ops; }
+ /**
+ * @brief Get operation sequence
+ * @return The operation sequence as const reference
+ */
+ const op::Sequence &operations(void) const { return _ops; }
+
+private:
+ std::shared_ptr<const ::internal::tflite::Model> _model;
+ operand::Context _operands;
+ op::Sequence _ops;
+};
+
+} // namepsace arm_compute
+} // namespace internal
+
+#include <arm_compute/core/ITensor.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+
+/**
+ * @brief Check if this runtime runs on GPU or NEON
+ * @return @c true if GPU mode, otherwise @c false
+ */
+bool isGpuMode();
+
+#define CAST_CL(tensor) static_cast<::arm_compute::CLTensor *>(tensor)
+#define CAST_NE(tensor) static_cast<::arm_compute::Tensor *>(tensor)
+
+} // namepsace arm_compute
+} // namespace internal
+
+#endif // __INTERNAL_ARM_COMPUTE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.cc b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.cc
new file mode 100644
index 000000000..1a5c735ee
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/arm_compute/Cast.h"
+
+#include "internal/Swizzle.h"
+
+::arm_compute::Coordinates getARMComputeAxises(uint32_t rank)
+{
+ ::arm_compute::Coordinates res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ res.set(axis, ToARMComputeAxis(rank, axis).value());
+ }
+
+ return res;
+}
+
+::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord,
+ const ::arm_compute::Coordinates &axises)
+{
+ ::arm_compute::Coordinates id{};
+ assert(runtime_coord.num_dimensions() == axises.num_dimensions());
+ for (size_t i = 0; i < runtime_coord.num_dimensions(); ++i)
+ {
+ id.set(axises[i], runtime_coord[i]);
+ }
+ return id;
+}
+
+// Restructure runtime_permutationVector to ACL_permutationVector
+::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
+ const int32_t *runtime_pv)
+{
+ // rank upto 4 is supported
+ assert(rank <= 4);
+ assert(runtime_pv != nullptr);
+
+ int new_pv[4] = {0};
+ ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
+
+ for (uint32_t i = 0; i < rank; ++i)
+ {
+ new_pv[axises[i]] = ToARMComputeAxis(rank, runtime_pv[i]).value();
+ }
+
+ ::arm_compute::PermutationVector ACL_PV =
+ ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+ ACL_PV.set_num_dimensions(rank);
+
+ return ACL_PV;
+}
+
+::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
+ bool apply_dim_correction)
+{
+ const uint32_t rank = shape.rank();
+
+ ::arm_compute::TensorShape res{};
+
+ res.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ // NOTE In some cases, in incorrect dimensions is required.
+ // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
+ // LSTM is used as the weight of the FullyConnected.
+ // The FullyConnected's weight must be greater or equal than 2-dimensions.
+ // However, if the dimension correction is applied to input_to_input_weights with input_size
+ // equal to 1, it will be changed to 1-D.
+ // So input_to_input_weights is not used by the weight of FullyConnected.
+ res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction);
+ }
+
+ return res;
+}
+
+::arm_compute::DataType asDataType(const int32_t type)
+{
+ switch (type)
+ {
+ case ANEURALNETWORKS_FLOAT32:
+ case ANEURALNETWORKS_TENSOR_FLOAT32:
+ return ::arm_compute::DataType::F32;
+ case ANEURALNETWORKS_INT32:
+ case ANEURALNETWORKS_TENSOR_INT32:
+ return ::arm_compute::DataType::S32;
+ case ANEURALNETWORKS_UINT32:
+ return ::arm_compute::DataType::U32;
+ case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+ return ::arm_compute::DataType::QASYMM8;
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code)
+{
+ switch (code)
+ {
+ case ANEURALNETWORKS_FUSED_NONE:
+ return ::arm_compute::ActivationLayerInfo{};
+ case ANEURALNETWORKS_FUSED_RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ case ANEURALNETWORKS_FUSED_RELU1:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+ case ANEURALNETWORKS_FUSED_RELU6:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
+{
+ return ::arm_compute::QuantizationInfo(scale, offset);
+}
+
+::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type,
+ const float scale, const int32_t zeroPoint)
+{
+ return ::arm_compute::TensorInfo(shape, 1, asDataType(type),
+ asQuantizationInfo(scale, zeroPoint));
+}
+
+::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape,
+ const ::arm_compute::DataType &type, const float scale,
+ const int32_t zeroPoint)
+{
+ return ::arm_compute::TensorInfo(shape, 1, type, asQuantizationInfo(scale, zeroPoint));
+}
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h
new file mode 100644
index 000000000..211a6ac87
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/Cast.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Cast.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines casting functions from internal object to arm compute object
+ */
+#ifndef __ARM_COMPUTE_CAST_H__
+#define __ARM_COMPUTE_CAST_H__
+
+#include <arm_compute/core/Coordinates.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/Types.h>
+
+#include <NeuralNetworks.h>
+
+#include "internal/Model.h"
+
+/**
+ * @brief Generate arm compute coordinate object from rank
+ * @param[in] rank Rank number
+ * @return Coordinate object
+ */
+::arm_compute::Coordinates getARMComputeAxises(uint32_t rank);
+
+/**
+ * @brief Generate arm compute coordinate object from runtime coordinate object
+ * @param[in] runtime_coord Runtime coordinates object
+ * @param[in] axises Coordinates for axises to map runtime-coordinates to
+ * arm_compute-coordinates
+ * @return Arm_compute coordinate object
+ */
+::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord,
+ const ::arm_compute::Coordinates &axises);
+
+/**
+* @brief Generate arm compute permutation vector from runtime permutation vector
+* @param[in] rank Rank number supported upto 4
+* @param[in] runtime_pv Integer array for runtime permutation vector
+* @return Permutation vector of arm compute
+*/
+::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
+ const int32_t *runtime_pv);
+/**
+ * @brief Cast from shape of internal to arm compute
+ * @param[in] shape Internal shape object
+ * @param[in] apply_dim_correction Flag to state whether apply dimension correction after setting
+ * one dimension in arm compute
+ * @return TensorShape object of arm compute
+ */
+::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
+ bool apply_dim_correction = true);
+
+/**
+ * @brief Cast from data type enum of NNAPI to arm compute
+ * @param[in] type NNAPI data type
+ * @return Data type of arm compute
+ */
+::arm_compute::DataType asDataType(const int32_t type);
+
+/**
+ * @brief Cast from NNAPI activation type enum to activation object of arm compute
+ * @param[in] code NNAPI activation type
+ * @return ActivationLayerInfo object of arm compute
+ */
+::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code);
+
+/**
+ * @brief Generate quantization info object of arm compute
+ * @param[in] scale Scale of quantization
+ * @param[in] offset Offset of quantization
+ * @return QuantizationInfo object of arm compute
+ */
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset);
+
+/**
+ * @brief Cast from internal tensor info to tensor info object of arm compute
+ * @param[in] shape Tensor shape
+ * @param[in] type Tensor type
+ * @param[in] scale Scale of tensor quantization
+ * @param[in] zeroPoint Zeropoint of tensor quantization
+ * @return TensorInfo object of arm compute
+ */
+::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type,
+ const float scale = 0.0f, const int32_t zeroPoint = 0);
+
+/**
+ * @brief Cast from internal tensor info to tensor info object of arm compute
+ * @param[in] shape Tensor shape
+ * @param[in] type Tensor type of arm compute
+ * @param[in] scale Scale of tensor quantization
+ * @param[in] zeroPoint Zeropoint of tensor quantization
+ * @return TensorInfo object of arm compute
+ */
+::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape,
+ const ::arm_compute::DataType &type, const float scale,
+ const int32_t zeroPoint);
+
+/**
+ * @brief Set value to arm compute tensor with casting
+ * @param[in] value Value to set
+ * @param[out] to Target tensor of arm compute
+ * @param[in] id Position of element
+ * @return N/A
+ */
+template <typename FromT>
+void copyCast(const FromT value, ::arm_compute::ITensor *to, const ::arm_compute::Coordinates &id)
+{
+ switch (to->info()->data_type())
+ {
+ case ::arm_compute::DataType::F32:
+ {
+ *reinterpret_cast<float *>(to->ptr_to_element(id)) = static_cast<float>(value);
+ break;
+ }
+ case ::arm_compute::DataType::S32:
+ {
+ *reinterpret_cast<int32_t *>(to->ptr_to_element(id)) = static_cast<int32_t>(value);
+ break;
+ }
+ case ::arm_compute::DataType::U32:
+ {
+ *reinterpret_cast<uint32_t *>(to->ptr_to_element(id)) = static_cast<uint32_t>(value);
+ break;
+ }
+ case ::arm_compute::DataType::QASYMM8:
+ {
+ float realValue = static_cast<float>(value);
+ // NOTE We haven't known the policy of rounding for quantization.
+ // So this is set to a temporary value.
+ *(to->ptr_to_element(id)) = to->info()->quantization_info().quantize(
+ realValue, ::arm_compute::RoundingPolicy::TO_ZERO);
+ break;
+ }
+ default:
+ throw std::runtime_error("Not supported, yet");
+ break;
+ }
+}
+
+#endif // __ARM_COMPUTE_CAST_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/feature/View.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/feature/View.h
new file mode 100644
index 000000000..c989ef4c2
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/feature/View.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::arm_compute::feature::View class
+ */
+#ifndef __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
+#define __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
+
+#include "misc/feature/Reader.h"
+
+#include <arm_compute/core/ITensor.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace feature
+{
+
+/**
+ * @brief Class to access feature's element
+ */
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Feature to support access
+ */
+ View(::arm_compute::ITensor *tensor) : _tensor{tensor}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get value of element in 3D feature using channel, row and column
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = feature_index_to_byte_offset(ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+ /**
+ * @brief Get value of element in 4D feature using batch, channel, row and column
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+public:
+ /**
+ * @brief Get reference of element in 3D feature using channel, row and column
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
+ T &at(uint32_t ch, uint32_t row, uint32_t col)
+ {
+ const auto offset = feature_index_to_byte_offset(ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+ /**
+ * @brief Get reference of element in 4D feature using batch, channel, row and column
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
+ T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
+ {
+ const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+private:
+ /**
+ * @brief Get offset of element in 3D feature
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
+ size_t feature_index_to_byte_offset(uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ // ARM Compute uses CHW ordering
+ return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch});
+ }
+
+ /**
+ * @brief Get offset of element in 4D feature
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
+ size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ // ARM Compute uses CHW ordering
+ return _tensor->info()->offset_element_in_bytes(
+ ::arm_compute::Coordinates{col, row, ch, batch});
+ }
+
+private:
+ ::arm_compute::ITensor *_tensor;
+};
+
+} // namespace feature
+} // namespace arm_compute
+} // namespace internal
+
+#endif // __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/kernel/View.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/kernel/View.h
new file mode 100644
index 000000000..399cdf913
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/kernel/View.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internel::arm_compute::kernel::View class
+ */
+#ifndef __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__
+#define __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__
+
+#include "misc/kernel/Shape.h"
+#include "misc/kernel/Reader.h"
+
+#include <arm_compute/core/ITensor.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace kernel
+{
+
+/**
+ * @brief Class to access kernel's element
+ */
+template <typename T> class View final : public nnfw::misc::kernel::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Kernel to support access
+ */
+ View(::arm_compute::ITensor *tensor) : _tensor{tensor}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get value of element in kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ const auto offset = kernel_index_to_byte_offset(nth, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+public:
+ /**
+ * @brief Get reference of element in kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
+ T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col)
+ {
+ const auto offset = kernel_index_to_byte_offset(nth, ch, row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+private:
+ /**
+ * @brief Get offset of element in kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
+ size_t kernel_index_to_byte_offset(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch, nth});
+ }
+
+private:
+ ::arm_compute::ITensor *_tensor;
+};
+
+} // namespace kernel
+} // namespace arm_compute
+} // namespace internal
+
+#endif // __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/matrix/View.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/matrix/View.h
new file mode 100644
index 000000000..305fff729
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/matrix/View.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::arm_compute::matrix::View class
+ */
+#ifndef __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
+#define __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
+
+#include "misc/matrix/Shape.h"
+#include "misc/matrix/Reader.h"
+
+#include <arm_compute/core/ITensor.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace matrix
+{
+
+/**
+ * @brief Class to access matrix's element
+ */
+template <typename T> class View final : public nnfw::misc::matrix::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Matrix to support access
+ */
+ View(::arm_compute::ITensor *tensor) : _tensor{tensor}
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Get value of element in matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t row, uint32_t col) const override
+ {
+ const auto offset = matrix_index_to_byte_offset(row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+public:
+ /**
+ * @brief Get reference of element in matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Refence of element
+ */
+ T &at(uint32_t row, uint32_t col)
+ {
+ const auto offset = matrix_index_to_byte_offset(row, col);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+private:
+ /**
+ * @brief Get offset of element in matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Offset of element
+ */
+ size_t matrix_index_to_byte_offset(uint32_t row, uint32_t col) const
+ {
+ return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row});
+ }
+
+private:
+ ::arm_compute::ITensor *_tensor;
+};
+
+} // namespace matrix
+} // namespace arm_compute
+} // namespace internal
+
+#endif // __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/arm_compute/tensor/View.h b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/tensor/View.h
new file mode 100644
index 000000000..372bd682d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/arm_compute/tensor/View.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::arm_compute::tensor::View class
+ */
+#ifndef __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
+#define __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
+
+#include "misc/tensor/Shape.h"
+#include "misc/tensor/Index.h"
+
+#include <arm_compute/core/ITensor.h>
+
+namespace internal
+{
+namespace arm_compute
+{
+namespace tensor
+{
+
+/**
+ * @brief Class to access tensor's element
+ */
+template <typename T> class View
+{
+public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] tensor Tensor to support access
+ */
+ View(::arm_compute::ITensor *tensor) : _tensor{tensor}
+ {
+ // DO NOTHING
+ }
+
+private:
+ /**
+ * @brief Get offset of element in tensor
+ * @param[in] index Index of element
+ * @return Offset of element
+ */
+ uint32_t byte_offset_of(const nnfw::misc::tensor::Index &index) const
+ {
+ // NOTE index.rank() >= _tensor->info()->num_dimensions() should hold here
+ const uint32_t rank = index.rank();
+
+ ::arm_compute::Coordinates coordinates;
+
+ coordinates.set_num_dimensions(rank);
+
+ for (uint32_t axis = 0; axis < rank; ++axis)
+ {
+ coordinates[axis] = index.at(axis);
+ }
+
+ return _tensor->info()->offset_element_in_bytes(coordinates);
+ }
+
+public:
+ /**
+ * @brief Get value of element in tensor
+ * @param[in] index Index of element
+ * @return Value of element
+ */
+ T at(const nnfw::misc::tensor::Index &index) const
+ {
+ const auto offset = byte_offset_of(index);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+ /**
+ * @brief Get reference of element in tensor
+ * @param[in] index Index of element
+ * @return Reference of element
+ */
+ T &at(const nnfw::misc::tensor::Index &index)
+ {
+ const auto offset = byte_offset_of(index);
+
+ T *ptr = reinterpret_cast<T *>(_tensor->buffer() + offset);
+
+ return *ptr;
+ }
+
+private:
+ ::arm_compute::ITensor *_tensor;
+};
+
+} // namespace tensor
+} // namespace arm_compute
+} // namespace internal
+
+#endif // __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Reader.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Reader.h
new file mode 100644
index 000000000..ac25692a1
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Reader.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::feature::Reader
+ */
+#ifndef __INTERNAL_NNAPI_FEATURE_READER_H__
+#define __INTERNAL_NNAPI_FEATURE_READER_H__
+
+#include "internal/nnapi/feature/Utils.h"
+
+#include "misc/feature/Reader.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace feature
+{
+
+/**
+ * @brief Class to support reading element in feature(3D, 4D)
+ */
+template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new Reader object
+ * @param[in] shape Shape of feature
+ * @param[in] ptr Pointer to feature data
+ * @param[in] len Size of tensor (byte)
+ */
+ // NOTE The parameter len denotes the number of bytes.
+ Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
+ : _shape{shape}, _ptr{ptr}
+ {
+ assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
+ }
+
+public:
+ /**
+ * @brief Get shape of feature
+ * @return Shape of feature
+ */
+ const nnfw::misc::feature::Shape &shape(void) const { return _shape; }
+
+public:
+ /**
+ * @brief Get value of element using channel, row, and column index for 3D feature
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ uint32_t index = index_of(_shape, ch, row, col);
+
+ const auto arr = reinterpret_cast<const T *>(_ptr);
+
+ return arr[index];
+ }
+
+ /**
+ * @brief Get value of element using batch, channel, row, and column index for 4D feature
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ uint32_t index = index_of(_shape, batch, ch, row, col);
+
+ return _ptr[index];
+ }
+
+private:
+ nnfw::misc::feature::Shape _shape;
+
+private:
+ const T *_ptr;
+};
+
+} // namespace feature
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_FEATURE_READER_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Utils.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Utils.h
new file mode 100644
index 000000000..ee59d217e
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/Utils.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Utils.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines utility functions used in internal::nnapi::feature namespace
+ */
+#ifndef __INTERNAL_NNAPI_FEATURE_UTILS_H__
+#define __INTERNAL_NNAPI_FEATURE_UTILS_H__
+
+#include "misc/feature/Shape.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace feature
+{
+
+/**
+ * @brief Get position of element using channel, row, and column for 3D feature
+ * @param[in] shape Shape of feature
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Position of element
+ */
+inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t ch, uint32_t row,
+ uint32_t col)
+{
+ uint32_t res = 0;
+
+ // NNAPI uses NHWC ordering
+ res += row * shape.W * shape.C;
+ res += col * shape.C;
+ res += ch;
+
+ return res;
+}
+
+/**
+ * @brief Get position of element using batch, channel, row, and column for 4D feature
+ * @param[in] shape Shape of feature
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Position of element
+ */
+inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t batch, uint32_t ch,
+ uint32_t row, uint32_t col)
+{
+ uint32_t res = 0;
+
+ // NNAPI uses NHWC ordering
+ res += batch * shape.H * shape.W * shape.C;
+ res += row * shape.W * shape.C;
+ res += col * shape.C;
+ res += ch;
+
+ return res;
+}
+
+} // namespace feature
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_FEATURE_UTILS_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/View.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/View.h
new file mode 100644
index 000000000..965e42f1c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/feature/View.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::feature::View class
+ */
+#ifndef __INTERNAL_NNAPI_FEATURE_VIEW_H__
+#define __INTERNAL_NNAPI_FEATURE_VIEW_H__
+
+#include "internal/nnapi/feature/Utils.h"
+
+#include "misc/feature/Reader.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace feature
+{
+
+/**
+ * @brief Class to access feature's element information using index
+ */
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] shape Shape of feature
+ * @param[in] ptr Pointer to feature data
+ * @param[in] len Size of feature (byte)
+ * @return
+ */
+ // NOTE The parameter len denotes the number of bytes.
+ View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
+ {
+ assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
+ }
+
+public:
+ /**
+ * @brief Get shape of feature
+ * @return Shape of feature
+ */
+ const nnfw::misc::feature::Shape &shape(void) const { return _shape; }
+
+public:
+ /**
+ * @brief Get value of element in 3D feature using channel, row, and column index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ uint32_t index = index_of(_shape, ch, row, col);
+
+ return _ptr[index];
+ }
+
+ /**
+ * @brief Get value of element in 4D feature using batch, channel, row and column index
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ uint32_t index = index_of(_shape, batch, ch, row, col);
+
+ return _ptr[index];
+ }
+
+ /**
+ * @brief Get reference of element in 3D feature using channel, row, and column index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
+ T &at(uint32_t ch, uint32_t row, uint32_t col)
+ {
+ uint32_t index = index_of(_shape, ch, row, col);
+
+ return _ptr[index];
+ }
+
+ /**
+ * @brief Get reference of element in 4D feature using batch, channel, row and column index
+ * @param[in] batch Batch index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Reference of element
+ */
+ T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
+ {
+ uint32_t index = index_of(_shape, batch, ch, row, col);
+
+ return _ptr[index];
+ }
+
+private:
+ nnfw::misc::feature::Shape _shape;
+
+private:
+ T *_ptr;
+};
+
+} // namespace feature
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_FEATURE_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/kernel/Reader.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
new file mode 100644
index 000000000..ae964f74c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::kernel::Reader class
+ */
+#ifndef __INTERNAL_NNAPI_KERNEL_READER_H__
+#define __INTERNAL_NNAPI_KERNEL_READER_H__
+
+#include "misc/kernel/Shape.h"
+#include "misc/kernel/Reader.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace kernel
+{
+
+/**
+ * @brief Class to support reading element in kernel
+ */
+template <typename T> class Reader final : public nnfw::misc::kernel::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new Reader object
+ * @param[in] shape Shape of kernel
+ * @param[in] ptr Pointer to kernel data
+ * @param[in] len Size of kernel (byte)
+ */
+ // NOTE The parameter len denotes the number of bytes.
+ Reader(const ::nnfw::misc::kernel::Shape &shape, const T *ptr, size_t len)
+ : _shape{shape}, _ptr{ptr}
+ {
+ assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
+ }
+
+public:
+ /**
+ * @brief Get shape of kernel
+ * @return Shape of kernel
+ */
+ const nnfw::misc::kernel::Shape &shape(void) const { return _shape; }
+
+public:
+ /**
+ * @brief Get value of element for kernel
+ * @param[in] nth Kernel index
+ * @param[in] ch Channel index
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ // NNAPI uses NHWC ordering
+ uint32_t index = 0;
+
+ index += nth * _shape.H * _shape.W * _shape.C;
+ index += row * _shape.W * _shape.C;
+ index += col * _shape.C;
+ index += ch;
+
+ return _ptr[index];
+ }
+
+private:
+ nnfw::misc::kernel::Shape _shape;
+
+private:
+ const T *_ptr;
+};
+
+} // namespace kernel
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_KERNEL_READER_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/matrix/Reader.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
new file mode 100644
index 000000000..f03a4be31
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::matrix::Reader class
+ */
+#ifndef __INTERNAL_NNAPI_MATRIX_READER_H__
+#define __INTERNAL_NNAPI_MATRIX_READER_H__
+
+#include "misc/matrix/Shape.h"
+#include "misc/matrix/Reader.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace matrix
+{
+
+/**
+ * @brief Class to support reading element in matrix
+ */
+template <typename T> class Reader final : public nnfw::misc::matrix::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a new Reader object
+ * @param[in] shape Shape of matrix
+ * @param[in] ptr Pointer to matrix data
+ * @param[in] len Size of matrix (byte)
+ */
+ // NOTE The parameter len denotes the number of bytes.
+ Reader(const ::nnfw::misc::matrix::Shape &shape, const T *ptr, size_t len)
+ : _shape{shape}, _ptr{ptr}
+ {
+ assert(shape.H * shape.W * sizeof(T) == len);
+ }
+
+public:
+ /**
+ * @brief Get shape of matrix
+ * @return Shape of matrix
+ */
+ const nnfw::misc::matrix::Shape &shape(void) const { return _shape; }
+
+public:
+ /**
+ * @brief Get value of element for matrix
+ * @param[in] row Row index
+ * @param[in] col Column index
+ * @return Value of element
+ */
+ T at(uint32_t row, uint32_t col) const override
+ {
+ // NNAPI uses NHWC ordering
+ uint32_t index = 0;
+
+ index += row * _shape.W;
+ index += col;
+
+ return _ptr[index];
+ }
+
+private:
+ nnfw::misc::matrix::Shape _shape;
+
+private:
+ const T *_ptr;
+};
+
+} // namespace matrix
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_MATRIX_READER_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
new file mode 100644
index 000000000..6a3fff646
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        ConstView.h
+ * @brief       This file contains ConstView class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
+#define __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
+
+#include "util/tensor/Shape.h"
+#include "util/tensor/Index.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace tensor
+{
+
+/**
+ * @brief Wrapper class to read tensor values
+ * @tparam T The tensor element type
+ */
+template <typename T> class ConstView
+{
+public:
+ /**
+ * @brief Construct a ConstView class
+ * @param[in] shape Tensor shape
+ * @param[in] ptr The base pointer of actual data
+ * @param[in] len The number of bytes
+ */
+ ConstView(const ::nnfw::misc::tensor::Shape &shape, const uint8_t *ptr, size_t len)
+ : _shape{shape}, _ptr{ptr}, _len{len}
+ {
+ // DO NOTHING
+ }
+
+public:
+ const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
+
+private:
+ // TODO Make this as a helper function, and share it for both View<T> and ConstView<T>
+ /**
+ * @brief Calculate offset for the given tensor index
+ * @param[in] index Tensor index
+ * @return The calculated offset
+ */
+ uint32_t offset_of(const nnfw::misc::tensor::Index &index) const
+ {
+ if (_shape.rank() == 0)
+ {
+ return 0;
+ }
+
+ uint32_t offset = index.at(0);
+
+ // Stride decreases as axis increases in NNAPI
+ for (uint32_t axis = 1; axis < _shape.rank(); ++axis)
+ {
+ offset *= _shape.dim(axis);
+ offset += index.at(axis);
+ }
+
+ return offset;
+ }
+
+public:
+ /**
+ * @brief Get the value on the given index
+ * @param[in] index Flattened tensor index
+ * @return The value on the given index
+ */
+ T at(const nnfw::misc::tensor::Index &index) const
+ {
+ const auto offset = offset_of(index);
+
+ const T *arr = reinterpret_cast<const T *>(_ptr);
+
+ return arr[offset];
+ }
+
+private:
+ const nnfw::misc::tensor::Shape _shape;
+
+private:
+ const uint8_t *const _ptr;
+ const size_t _len;
+};
+
+} // namespace tensor
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/Reader.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
new file mode 100644
index 000000000..fc6d490da
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file        Reader.h
+ * @brief       This file contains Reader class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_NNAPI_TENSOR_READER_H__
+#define __INTERNAL_NNAPI_TENSOR_READER_H__
+
+#include <vector>
+#include "misc/tensor/Reader.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace tensor
+{
+
+/**
+ * @brief Wrapper class to read tensor values
+ * @tparam T The tensor element type
+ */
+template <typename T> class Reader final : public nnfw::misc::tensor::Reader<T>
+{
+public:
+ /**
+ * @brief Construct a Reader class
+ * @param[in] shape Tensor shape
+ * @param[in] ptr The base pointer of actual data
+ * @param[in] len The number of bytes
+ */
+ Reader(const ::nnfw::misc::tensor::Shape &shape, const T *ptr, size_t len)
+ : _shape{shape}, _ptr{ptr}
+ {
+ assert(shape.num_elements() * sizeof(T) == len);
+ initialize();
+ }
+
+public:
+ /**
+ * @brief Get shape object
+ * @return The shape as const reference
+ */
+ const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
+
+public:
+ /**
+ * @brief Get the value on the given index
+ * @param[in] index_nnapi Flattened tensor index
+ * @return The value on the given index
+ */
+ T at(const nnfw::misc::tensor::Index &index_nnapi) const override
+ {
+ uint32_t offset = 0;
+
+ for (int i = 0; i < _shape.rank(); i++)
+ offset += index_nnapi.at(i) * _stridess.at(i);
+
+ return _ptr[offset];
+ }
+
+private:
+ /**
+ * @brief Initializes @c _stridess
+ * @return N/A
+ * @note Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr,
+ we need to calculate the offset of index [i4, .. i1] as follows:
+ offset = i4 * (d3 * d2 * d1) +
+ i3 * (d2 * d1) +
+ i2 * (d1) +
+ i1
+ So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is
+ calculate. To minimize this repetitive calculation,
+ _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0]
+ */
+ void initialize(void)
+ {
+ for (int r = 0; r < _shape.rank(); r++)
+ {
+ int elem_count = 1;
+ for (int k = r + 1; k < _shape.rank(); k++)
+ elem_count *= _shape.dim(k);
+ _stridess.emplace_back(elem_count);
+ }
+ }
+
+private:
+ nnfw::misc::tensor::Shape _shape;
+
+private:
+ const T *_ptr;
+ std::vector<int32_t> _stridess;
+};
+
+} // namespace tensor
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_TENSOR_READER_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/View.h b/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/View.h
new file mode 100644
index 000000000..4766851b9
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/nnapi/tensor/View.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::nnapi::tensor::View class
+ */
+#ifndef __INTERNAL_NNAPI_TENSOR_VIEW_H__
+#define __INTERNAL_NNAPI_TENSOR_VIEW_H__
+
+#include "misc/tensor/Shape.h"
+#include "misc/tensor/Index.h"
+
+namespace internal
+{
+namespace nnapi
+{
+namespace tensor
+{
+
+/**
+ * @brief Class to access tensor's element information using index
+ */
+template <typename T> class View
+{
+public:
+ /**
+ * @brief Construct a new View object
+ * @param[in] shape Shape of tensor
+ * @param[in] ptr Pointer to tensor data
+ * @param[in] len Size of tensor (byte)
+ */
+ // NOTE The parameter len denotes the number of bytes.
+ View(const ::nnfw::misc::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
+ {
+ assert(shape.num_elements() * sizeof(T) == len);
+ }
+
+public:
+ /**
+ * @brief Get shape of tensor
+ * @return Shape of tensor
+ */
+ const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
+
+private:
+ /**
+ * @brief Get position of element using index in tensor
+ * @param[in] index Index of element
+ * @return Position of element
+ */
+ uint32_t offset_of(const nnfw::misc::tensor::Index &index) const
+ {
+ if (_shape.rank() == 0)
+ {
+ return 0;
+ }
+
+ uint32_t offset = index.at(0);
+
+ // Stride decreases as axis increases in NNAPI
+ for (uint32_t axis = 1; axis < _shape.rank(); ++axis)
+ {
+ offset *= _shape.dim(axis);
+ offset += index.at(axis);
+ }
+
+ return offset;
+ }
+
+public:
+ /**
+ * @brief Get value of element at index
+ * @param[in] index Index of element
+ * @return Value of element at index
+ */
+ T at(const nnfw::misc::tensor::Index &index) const
+ {
+ const auto offset = offset_of(index);
+
+ return _ptr[offset];
+ }
+
+ /**
+ * @brief Get reference of element at index
+ * @param[in] index Index of element
+ * @return Reference of element at index
+ */
+ T &at(const nnfw::misc::tensor::Index &index)
+ {
+ const auto offset = offset_of(index);
+
+ return _ptr[offset];
+ }
+
+private:
+ nnfw::misc::tensor::Shape _shape;
+
+private:
+ T *_ptr;
+};
+
+} // namespace tensor
+} // namespace nnapi
+} // namespace internal
+
+#endif // __INTERNAL_NNAPI_TENSOR_VIEW_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Abs.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Abs.cc
new file mode 100644
index 000000000..e23a9538c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Abs.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Abs.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+ input_index = inputs[0];
+}
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Abs.h b/runtime/contrib/pure_arm_compute/src/internal/op/Abs.h
new file mode 100644
index 000000000..0be8b0205
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Abs.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_ABS_H__
+#define __INTERNAL_OP_ABS_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+struct Param
+{
+ int32_t output_index;
+ int32_t input_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ABS_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Add.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Add.cc
new file mode 100644
index 000000000..52803261f
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Add.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Add.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Add
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Add
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Add
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ // 2 -> Activation Index
+ lhs_index = inputs[0];
+ rhs_index = inputs[1];
+ activation_index = inputs[2];
+}
+
+} // namespace Add
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Add.h b/runtime/contrib/pure_arm_compute/src/internal/op/Add.h
new file mode 100644
index 000000000..a7804a569
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Add.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Add.h
+ * @brief This file contains accept function and params for Add operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_ADD_H__
+#define __INTERNAL_OP_ADD_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Add
+{
+
+/**
+ * @brief Struct of Add operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t lhs_index; /**< Left hand side index */
+ int32_t rhs_index; /**< Right hand side index */
+ int32_t activation_index; /**< Activation index */
+
+ /**
+ * @brief Construct a new Param object for Add as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Add with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Add
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for Add with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for Add
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for Add
+ * @return Parameters of Add
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for Add
+ * @param [in] v Node visitor for invoking visit function of Add
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Add
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ADD_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.cc
new file mode 100644
index 000000000..485430377
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/op/ArgMax.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.h b/runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.h
new file mode 100644
index 000000000..780af2232
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ArgMax.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_ARGMAX_H__
+#define __INTERNAL_OP_ARGMAX_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+ int32_t axis_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ARGMAX_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.cc b/runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.cc
new file mode 100644
index 000000000..ae4c9411e
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/AvgPool2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace AvgPool2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace AvgPool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace AvgPool2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 10 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_left_index = inputs[1];
+ padding_right_index = inputs[2];
+ padding_top_index = inputs[3];
+ padding_bottom_index = inputs[4];
+ hstride_index = inputs[5];
+ vstride_index = inputs[6];
+ kw_index = inputs[7];
+ kh_index = inputs[8];
+ activation_index = inputs[9];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 7 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_index = inputs[1];
+ hstride_index = inputs[2];
+ vstride_index = inputs[3];
+ kw_index = inputs[4];
+ kh_index = inputs[5];
+ activation_index = inputs[6];
+}
+
+} // namespace Implicit
+} // namespace AvgPool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.h b/runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.h
new file mode 100644
index 000000000..cf9061ca9
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/AvgPool2D.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file AvgPool2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::AvgPool2D Param structs
+ * and internal::tflite::op::AvgPool2D Node classes
+ */
+#ifndef __INTERNAL_OP_AVG_POOL_2D_H__
+#define __INTERNAL_OP_AVG_POOL_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace AvgPool2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_left_index; /**< Index of padding left */
+ int32_t padding_right_index; /**< Index of padding right */
+ int32_t padding_top_index; /**< Index of padding top */
+ int32_t padding_bottom_index; /**< Index of padding bottom */
+
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_index; /**< Index of padding */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Implicit
+} // namespace AvgPool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_MAX_POOL_2D_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc b/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
new file mode 100644
index 000000000..0768039d0
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
@@ -0,0 +1,63 @@
+/*Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/op/BatchToSpaceNd.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+}
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.h b/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.h
new file mode 100644
index 000000000..a514cb44c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/BatchToSpaceNd.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_BATCHTOSPACE_ND_H__
+#define __INTERNAL_OP_BATCHTOSPACE_ND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+ int32_t block_size_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_BATCHTOSPACE_Nd_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Cast.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Cast.cc
new file mode 100644
index 000000000..13f58f137
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Cast.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Cast.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Cast
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Cast
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Cast
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ input_index = inputs[0];
+}
+
+} // namespace Cast
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Cast.h b/runtime/contrib/pure_arm_compute/src/internal/op/Cast.h
new file mode 100644
index 000000000..8af741a16
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Cast.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Cast.h
+ * @brief This file contains accept function and params for Cast operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_CAST_H__
+#define __INTERNAL_OP_CAST_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Cast
+{
+
+/**
+ * @brief Struct of Cast operation's param
+ */
+struct Param
+{
+ int32_t output_index; /**< Output index */
+
+ int32_t input_index; /**< Input index */
+
+ /**
+ * @brief Construct a new Param object for Cast as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Cast with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Cast
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for Cast with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for Cast
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for Cast
+ * @return Parameters of Cast
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for Cast
+ * @param [in] v Node visitor for invoking visit function of Cast
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Cast
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_Cast_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Concat.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Concat.cc
new file mode 100644
index 000000000..ee1730051
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Concat.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Concat.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Concat
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Concat
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Concat
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // When there are N + 1 inputs, each input should be interpreted as follows:
+ //
+ // [0, N) -> Input tensors
+ // N -> Axis
+ axis_index = inputs[inputCount - 1];
+
+ for (uint32_t n = 0; n < inputCount - 1; ++n)
+ {
+ ifm_indexes.emplace_back(inputs[n]);
+ }
+}
+
+} // namespace Concat
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Concat.h b/runtime/contrib/pure_arm_compute/src/internal/op/Concat.h
new file mode 100644
index 000000000..207f964fb
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Concat.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Concat.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Concat node
+ */
+
+#ifndef __INTERNAL_OP_CONCAT_H__
+#define __INTERNAL_OP_CONCAT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Concat
+{
+
+/**
+ * @brief Struct to manipulate parameter for Concat operation
+ */
+struct Param
+{
+ int32_t ofm_index; //!< index for output
+
+ std::vector<int32_t> ifm_indexes; //!< index for input
+ int32_t axis_index; //!< index for axis
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Concat Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Concat Node object
+ * @param param Parameter for Concat Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Concat node
+};
+
+} // namespace Concat
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_CONCAT_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.cc
new file mode 100644
index 000000000..a24d14632
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Conv2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Conv2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace Conv2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Conv2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 10 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Kernel Tensor Index
+ // 2 -> Bias Tensor Index
+ // 3 -> Padding_left index
+ // 4 -> Padding_right index
+ // 5 -> Padding_top index
+ // 6 -> Padding_bottom index
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) INdex
+ // 9 -> Activation Index
+ ifm_index = inputs[0];
+ ker_index = inputs[1];
+ bias_index = inputs[2];
+ padding_left_index = inputs[3];
+ padding_right_index = inputs[4];
+ padding_top_index = inputs[5];
+ padding_bottom_index = inputs[6];
+ hstride_index = inputs[7];
+ vstride_index = inputs[8];
+ activation_index = inputs[9];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 7 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Kernel Tensor Index
+ // 2 -> Bias Tensor Index
+ // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 4 -> Stride (width) Index
+ // 5 -> Stride (height) INdex
+ // 6 -> Activation Index
+ ifm_index = inputs[0];
+ ker_index = inputs[1];
+ bias_index = inputs[2];
+ padding_index = inputs[3];
+ hstride_index = inputs[4];
+ vstride_index = inputs[5];
+ activation_index = inputs[6];
+}
+
+} // namespace Implicit
+} // namespace Conv2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.h b/runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.h
new file mode 100644
index 000000000..de46fbb9c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Conv2D.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Conv2D.h
+ * @brief This file contains accept function and params for Conv2D operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_CONV_2D_H__
+#define __INTERNAL_OP_CONV_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Conv2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct of Conv2D(explicit) operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+ int32_t ker_index; /**< Kernel index */
+ int32_t bias_index; /**< Bias index */
+
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
+
+ int32_t padding_left_index; /**< Left padding index */
+ int32_t padding_right_index; /**< Right padding index */
+ int32_t padding_top_index; /**< Top padding index */
+ int32_t padding_bottom_index; /**< Bottomd padding index */
+
+ int32_t activation_index; /**< Activation index */
+
+ /**
+ * @brief Construct a new Param object for Conv2D(explicit) as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Conv2D(explicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Conv2D(explicit)
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for conv2D(explicit) with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for conv2D(explicit)
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for conv2D(explicit)
+ * @return Parameters of conv2D(explicit)
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for conv2D(explicit)
+ * @param [in] v Node visitor for invoking visit function of conv2D(explicit)
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+/**
+ * @brief Struct of Conv2D(implicit) operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+ int32_t ker_index; /**< Kernel index */
+ int32_t bias_index; /**< Bias index */
+
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
+
+ int32_t padding_index; /**< Padding index */
+ int32_t activation_index; /**< Activation index */
+
+ /**
+ * @brief Construct a new Param object for Conv2D(implicit) as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Conv2D(implicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Conv2D(implicit)
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for conv2D(implicit) with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for conv2D(implicit)
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for conv2D(implicit)
+ * @return Parameters of conv2D(implicit)
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for conv2D(implicit)
+ * @param [in] v Node visitor for invoking visit function of conv2D(implicit)
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Implicit
+} // namespace Conv2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_CONV_2D_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.cc b/runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.cc
new file mode 100644
index 000000000..db164a148
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/DepthToSpace.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+}
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.h b/runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.h
new file mode 100644
index 000000000..dd4c5c914
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/DepthToSpace.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_DEPTHTOSPACE_H__
+#define __INTERNAL_OP_DEPTHTOSPACE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+ int32_t block_size_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_DEPTHTOSPACE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc b/runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc
new file mode 100644
index 000000000..f4d1ca3c5
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/DepthwiseConv2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthwiseConv2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace DepthwiseConv2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthwiseConv2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 11 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Kernel Tensor Index
+ // 2 -> Bias Tensor Index
+ // 3 -> Padding_left index
+ // 4 -> Padding_right index
+ // 5 -> Padding_top index
+ // 6 -> Padding_bottom index
+ // 7 -> Stride (width) Index
+ // 8 -> Stride (height) INdex
+ // 9 -> Depthwise Multiplier
+ // 10 -> Activation Index
+ ifm_index = inputs[0];
+ ker_index = inputs[1];
+ bias_index = inputs[2];
+ padding_left_index = inputs[3];
+ padding_right_index = inputs[4];
+ padding_top_index = inputs[5];
+ padding_bottom_index = inputs[6];
+ hstride_index = inputs[7];
+ vstride_index = inputs[8];
+ multiplier_index = inputs[9];
+ activation_index = inputs[10];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 8 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Kernel Tensor Index
+ // 2 -> Bias Tensor Index
+ // 3 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 4 -> Stride (width) Index
+ // 5 -> Stride (height) INdex
+ // 6 -> Depthwise Multiplier
+ // 7 -> Activation Index
+ ifm_index = inputs[0];
+ ker_index = inputs[1];
+ bias_index = inputs[2];
+ padding_index = inputs[3];
+ hstride_index = inputs[4];
+ vstride_index = inputs[5];
+ multiplier_index = inputs[6];
+ activation_index = inputs[7];
+}
+
+} // namespace Implicit
+} // namespace DepthwiseConv2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
new file mode 100644
index 000000000..01a9e48be
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file DepthwiseConv2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::DepthwiseConv2D Param structs
+ * and internal::tflite::op::DepthwiseConv2D Node classes
+ */
+#ifndef __INTERNAL_OP_DEPTHWISE_CONV_2D_H__
+#define __INTERNAL_OP_DEPTHWISE_CONV_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthwiseConv2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct to have indexes for explicit padding DepthwiseConv2D operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t ker_index; /**< Index of kernel */
+ int32_t bias_index; /**< Index of bias */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_left_index; /**< Index of padding left */
+ int32_t padding_right_index; /**< Index of padding right */
+ int32_t padding_top_index; /**< Index of padding top */
+ int32_t padding_bottom_index; /**< Index of padding bottom */
+
+ int32_t multiplier_index; /**< Index of multipler */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an explicit padding DepthwiseConv2D operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Explicit
+
+/**
+ * @brief Struct to have indexes for implicit padding DepthwiseConv2D operation parameter
+ */
+namespace Implicit
+{
+
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t ker_index; /**< Index of kernel */
+ int32_t bias_index; /**< Index of bias */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_index; /**< Index of padding */
+ int32_t multiplier_index; /**< Index of multipler */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an implicit padding DepthwiseConv2D operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Implicit
+} // namespace DepthwiseConv2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_CONV_2D_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.cc
new file mode 100644
index 000000000..7062463a2
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Dequantize.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Dequantize
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Dequantize
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Dequantize
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ input_index = inputs[0];
+}
+
+} // namespace Dequantize
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.h b/runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.h
new file mode 100644
index 000000000..f19898e9e
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Dequantize.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Dequantize.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Dequantize::Param struct
+ * and internal::tflite::op::Dequantize::Node class
+ */
+#ifndef __INTERNAL_OP_DEQUANTIZE_H__
+#define __INTERNAL_OP_DEQUANTIZE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Dequantize
+{
+
+/**
+ * @brief Struct to have indexes for Dequantize operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+
+ int32_t input_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an Dequantize operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Dequantize
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_DEQUANTIZE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Div.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Div.cc
new file mode 100644
index 000000000..649407eab
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Div.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Div.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Div
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Div
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Div
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ // 2 -> Activation Index
+ lhs_index = inputs[0];
+ rhs_index = inputs[1];
+ activation_index = inputs[2];
+}
+
+} // namespace Div
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Div.h b/runtime/contrib/pure_arm_compute/src/internal/op/Div.h
new file mode 100644
index 000000000..d5fc09d19
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Div.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Div.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Div::Param struct
+ * and internal::tflite::op::Div::Node class
+ */
+#ifndef __INTERNAL_OP_DIV_H__
+#define __INTERNAL_OP_DIV_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Div
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t lhs_index; /**< Index of lhs */
+ int32_t rhs_index; /**< Index of rhs */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Div
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_DIV_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.cc b/runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.cc
new file mode 100644
index 000000000..a6eda3473
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/EmbeddingLookup.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace EmbeddingLookup
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace EmbeddingLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace EmbeddingLookup
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lookups Index
+ // 1 -> Values Index
+ lookups_index = inputs[0];
+ values_index = inputs[1];
+}
+
+} // namespace EmbeddingLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.h b/runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.h
new file mode 100644
index 000000000..17e8485f7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/EmbeddingLookup.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file EmbeddingLookup.h
+ * @brief This file contains accept function and params for EmbeddingLookup operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_EMBEDDING_LOOKUP_H__
+#define __INTERNAL_OP_EMBEDDING_LOOKUP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace EmbeddingLookup
+{
+
+/**
+ * @brief Struct of EmbeddingLookup operation's param
+ */
+struct Param
+{
+ int32_t output_index; /**< Output index */
+
+ int32_t lookups_index; /**< Lookups index */
+ int32_t values_index; /**< Values index */
+
+ /**
+ * @brief Construct a new Param object for EmbeddingLookup as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for EmbeddingLookup with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for EmbeddingLookup
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for EmbeddingLookup with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for EmbeddingLookup
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for EmbeddingLookup
+ * @return Parameters of EmbeddingLookup
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for EmbeddingLookup
+ * @param [in] v Node visitor for invoking visit function of EmbeddingLookup
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace EmbeddingLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EMBEDDING_LOOKUP_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Equal.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Equal.cc
new file mode 100644
index 000000000..b9cccc6a9
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Equal.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Equal.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Equal.h b/runtime/contrib/pure_arm_compute/src/internal/op/Equal.h
new file mode 100644
index 000000000..78b9f846f
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Equal.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_EQUAL_H__
+#define __INTERNAL_OP_EQUAL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EQUAL_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Exp.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Exp.cc
new file mode 100644
index 000000000..6f1aa8f42
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Exp.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Exp.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Exp.h b/runtime/contrib/pure_arm_compute/src/internal/op/Exp.h
new file mode 100644
index 000000000..ac7f244b7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Exp.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_EXP_H__
+#define __INTERNAL_OP_EXP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EXP_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Floor.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Floor.cc
new file mode 100644
index 000000000..c04f0c8ab
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Floor.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Floor.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Floor
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Floor
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Floor
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ input_index = inputs[0];
+}
+
+} // namespace Floor
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Floor.h b/runtime/contrib/pure_arm_compute/src/internal/op/Floor.h
new file mode 100644
index 000000000..5264ec10c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Floor.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Floor.h
+ * @brief This file contains accept function and params for Floor operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_FLOOR_H__
+#define __INTERNAL_OP_FLOOR_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Floor
+{
+
+/**
+ * @brief Struct of Floor operation's param
+ */
+struct Param
+{
+ int32_t output_index; /**< Output index */
+
+ int32_t input_index; /**< Input index */
+
+ /**
+ * @brief Construct a new Param object for Floor as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Floor with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Floor
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for Floor with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for Floor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for Floor
+ * @return Parameters of Floor
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for Floor
+ * @param [in] v Node visitor for invoking visit function of Floor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Floor
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_FLOOR_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.cc b/runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.cc
new file mode 100644
index 000000000..491fa5918
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/FullyConnected.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace FullyConnected
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace FullyConnected
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace FullyConnected
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 4 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> A tensor, specifying the input.
+ // 1 -> A 2-D tensor, specifying the weights
+ // 2 -> A 1-D tensor, specifying the bias
+ // 3 -> An INT32 value, and has to be one of the FuseCode values
+ input_index = inputs[0];
+ weight_index = inputs[1];
+ bias_index = inputs[2];
+ activation_index = inputs[3];
+}
+
+} // namespace FullyConnected
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.h b/runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.h
new file mode 100644
index 000000000..434308435
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/FullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file FullyConnected.h
+ * @brief This file contains accept function and params for FullyConnected operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_FULLY_CONNTECTED_H__
+#define __INTERNAL_OP_FULLY_CONNTECTED_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace FullyConnected
+{
+
+/**
+ * @brief Struct of FullyConnected operation's param
+ */
+struct Param
+{
+ int32_t output_index; /**< Output index */
+
+ int32_t input_index; /**< Input index */
+ int32_t weight_index; /**< Weight index */
+ int32_t bias_index; /**< Bias index */
+ int32_t activation_index; /**< Activation index */
+
+ /**
+ * @brief Construct a new Param object for FullyConnected as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for FullyConnected with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for FullyConnected
+ */
+class Node final : public op::Node
+{
+ /**
+ * @brief Construct a new Node object for FullyConnected with param
+ * @param [in] param Parameters for Node
+ */
+public:
+ /**
+ * @brief Destroy the Node object for FullyConnected
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for FullyConnected
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Parameter Get parameters for FullyConnected
+ * @return _param Parameters of FullyConnected
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for FullyConnected
+ * @param [in] v Node visitor for invoking visit function of FullyConnected
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace FullyConnected
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_FULLY_CONNTECTED_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Gather.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Gather.cc
new file mode 100644
index 000000000..bc517d28c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Gather.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Gather.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Gather
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Gather
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Gather
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> indices Tensor Index
+ // 2 -> axis Index
+ ifm_index = inputs[0];
+ indices_index = inputs[1];
+ axis_index = inputs[2];
+}
+
+} // namespace Gather
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Gather.h b/runtime/contrib/pure_arm_compute/src/internal/op/Gather.h
new file mode 100644
index 000000000..d40794f99
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Gather.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Gather.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Gather operation
+ */
+
+#ifndef __INTERNAL_OP_GATHER_H__
+#define __INTERNAL_OP_GATHER_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Gather
+{
+
+/**
+ * @brief Struct to manipulate parameter for Gather operation
+ */
+struct Param
+{
+ int32_t ofm_index; //!< index for output feature map
+
+ int32_t ifm_index; //!< index for ifm tensor
+ int32_t indices_index; //!< index for indices tensor
+ int32_t axis_index; //!< index for axis
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Gather Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Tanh Node object
+ * @param param Parameter for Tanh Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Gather node
+};
+
+} // namespace Gather
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_GATHER_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.cc b/runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.cc
new file mode 100644
index 000000000..7e04ecf82
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/HashtableLookup.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 2);
+
+ output_index = outputs[0];
+ hits_index = outputs[1];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Lookups Index
+ // 1 -> Keys Index
+ // 2 -> Values Index
+ lookups_index = inputs[0];
+ keys_index = inputs[1];
+ values_index = inputs[2];
+}
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.h b/runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.h
new file mode 100644
index 000000000..a5b43d1c7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/HashtableLookup.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file HashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::HashtableLookup::Param struct
+ * and internal::tflite::op::HashtableLookup::Node class
+ */
+#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+ int32_t hits_index; /**< Index of hits */
+
+ int32_t lookups_index; /**< Index of lookups */
+ int32_t values_index; /**< Index of values */
+ int32_t keys_index; /**< Index of keys */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.cc b/runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.cc
new file mode 100644
index 000000000..44a6ee63d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/L2Normalization.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Normalization
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace L2Normalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Normalization
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ ifm_index = inputs[0];
+}
+
+} // namespace L2Normalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.h b/runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.h
new file mode 100644
index 000000000..2e94fac11
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/L2Normalization.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file L2Normalization.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::L2Normalization::Param struct
+ * and internal::tflite::op::L2Normalization::Node class
+ */
+#ifndef __INTERNAL_OP_L2_NORMALIZATION_H__
+#define __INTERNAL_OP_L2_NORMALIZATION_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Normalization
+{
+
+/**
+ * @brief Struct to have indexes for L2Normalization operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an L2Normalization operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace L2Normalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_L2_NORMALIZATION_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.cc b/runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.cc
new file mode 100644
index 000000000..64041ab49
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/L2Pool2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 10 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_left_index = inputs[1];
+ padding_right_index = inputs[2];
+ padding_top_index = inputs[3];
+ padding_bottom_index = inputs[4];
+ hstride_index = inputs[5];
+ vstride_index = inputs[6];
+ kw_index = inputs[7];
+ kh_index = inputs[8];
+ activation_index = inputs[9];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 7 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_index = inputs[1];
+ hstride_index = inputs[2];
+ vstride_index = inputs[3];
+ kw_index = inputs[4];
+ kh_index = inputs[5];
+ activation_index = inputs[6];
+}
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.h b/runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.h
new file mode 100644
index 000000000..facb223c7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/L2Pool2D.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file L2Pool2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::L2Pool2D Param structs
+ * and internal::tflite::op::L2Pool2D Node classes
+ */
+#ifndef __INTERNAL_OP_L2_POOL_2D_H__
+#define __INTERNAL_OP_L2_POOL_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_left_index; /**< Index of padding left */
+ int32_t padding_right_index; /**< Index of padding right */
+ int32_t padding_top_index; /**< Index of padding top */
+ int32_t padding_bottom_index; /**< Index of padding bottom */
+
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+
+ int32_t kw_index; /**< Index of kernel width */
+ int32_t kh_index; /**< Index of kernel height */
+
+ int32_t hstride_index; /**< Index of horizontal stride */
+ int32_t vstride_index; /**< Index of vertical stride */
+
+ int32_t padding_index; /**< Index of padding */
+ int32_t activation_index; /**< Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_L2_POOL_2D_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc b/runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc
new file mode 100644
index 000000000..b7419d923
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LocalResponseNormalization.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 5 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ ifm_index = inputs[0];
+ radius_index = inputs[1];
+ bias_index = inputs[2];
+ alpha_index = inputs[3];
+ beta_index = inputs[4];
+}
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.h b/runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.h
new file mode 100644
index 000000000..29e0699ad
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LocalResponseNormalization.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
+#define __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+ int32_t radius_index;
+ int32_t bias_index;
+ int32_t alpha_index;
+ int32_t beta_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.cc b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.cc
new file mode 100644
index 000000000..5b7da4d3b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalAnd.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.h b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.h
new file mode 100644
index 000000000..2f53f756d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalAnd.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_AND_H__
+#define __INTERNAL_OP_LOGICAL_AND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_AND_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.cc b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.cc
new file mode 100644
index 000000000..4cb6a8e2a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalNot.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ input_index = inputs[0];
+}
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.h b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.h
new file mode 100644
index 000000000..9593deafe
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalNot.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_NOT_H__
+#define __INTERNAL_OP_LOGICAL_NOT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_NOT_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.cc b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.cc
new file mode 100644
index 000000000..8295f6f0b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalOr.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.h b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.h
new file mode 100644
index 000000000..6487fa720
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/LogicalOr.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_OR_H__
+#define __INTERNAL_OP_LOGICAL_OR_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_OR_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Logistic.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Logistic.cc
new file mode 100644
index 000000000..93ecd043c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Logistic.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Logistic.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Logistic
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Logistic
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Logistic
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace Logistic
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Logistic.h b/runtime/contrib/pure_arm_compute/src/internal/op/Logistic.h
new file mode 100644
index 000000000..a42fdc0d4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Logistic.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Logistic.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Logistic::Param struct
+ * and internal::tflite::op::Logistic::Node class
+ */
+#ifndef __INTERNAL_OP_LOGISTIC_H__
+#define __INTERNAL_OP_LOGISTIC_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Logistic
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Logistic
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGISTIC_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Lstm.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Lstm.cc
new file mode 100644
index 000000000..3f5e9a490
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Lstm.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Lstm.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LSTM
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LSTM
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LSTM
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 23 && outputCount == 4);
+
+ scratch_buffer_index = outputs[0];
+ output_state_out_index = outputs[1];
+ cell_state_out_index = outputs[2];
+ output_index = outputs[3];
+
+ input_index = inputs[0];
+ input_to_input_weights_index = inputs[1];
+ input_to_forget_weights_index = inputs[2];
+ input_to_cell_weights_index = inputs[3];
+ input_to_output_weights_index = inputs[4];
+ recurrent_to_input_weights_index = inputs[5];
+ recurrent_to_forget_weights_index = inputs[6];
+ recurrent_to_cell_weights_index = inputs[7];
+ recurrent_to_output_weights_index = inputs[8];
+ cell_to_input_weights_index = inputs[9];
+ cell_to_forget_weights_index = inputs[10];
+ cell_to_output_weights_index = inputs[11];
+ input_gate_bias_index = inputs[12];
+ forget_gate_bias_index = inputs[13];
+ cell_bias_index = inputs[14];
+ output_gate_bias_index = inputs[15];
+ projection_weights_index = inputs[16];
+ projection_bias_index = inputs[17];
+ output_state_in_index = inputs[18];
+ cell_state_in_index = inputs[19];
+ activation_index = inputs[20];
+ cell_threshold_index = inputs[21];
+ projection_threshold_index = inputs[22];
+}
+
+} // namespace LSTM
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Lstm.h b/runtime/contrib/pure_arm_compute/src/internal/op/Lstm.h
new file mode 100644
index 000000000..f51f0402a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Lstm.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Lstm.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::LSTM::Param struct
+ * and internal::tflite::op::LSTM::Node class
+ */
+#ifndef __INTERNAL_OP_LSTM_H__
+#define __INTERNAL_OP_LSTM_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LSTM
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t scratch_buffer_index; /**< Index of scartch buffer */
+ int32_t output_state_out_index; /**< Index of output state out */
+ int32_t cell_state_out_index; /**< Index of cell state out */
+ int32_t output_index; /**< Index of output */
+
+ int32_t input_index; /**< Index of input */
+ int32_t input_to_input_weights_index; /**< Index of input to input weights */
+ int32_t input_to_forget_weights_index; /**< Index of input to forget weights */
+ int32_t input_to_cell_weights_index; /**< Index of input to cell weights */
+ int32_t input_to_output_weights_index; /**< Index of input to output weights */
+ int32_t recurrent_to_input_weights_index; /**< Index of recurrent to input weights */
+ int32_t recurrent_to_forget_weights_index; /**< Index of recurrent to forget weights */
+ int32_t recurrent_to_cell_weights_index; /**< Index of recurrent to cell weights */
+ int32_t recurrent_to_output_weights_index; /**< Index of recurrent to output weights */
+ int32_t cell_to_input_weights_index; /**< Index of cell to input weights */
+ int32_t cell_to_forget_weights_index; /**< Index of cell to forget weights */
+ int32_t cell_to_output_weights_index; /**< Index of cell to output weights */
+ int32_t input_gate_bias_index; /**< Index of input gate bias */
+ int32_t forget_gate_bias_index; /**< Index of forget gate bias */
+ int32_t cell_bias_index; /**< Index of cell bias */
+ int32_t output_gate_bias_index; /**< Index of output gate bias */
+ int32_t projection_weights_index; /**< Index of projection weights */
+ int32_t projection_bias_index; /**< Index of projection bias */
+ int32_t output_state_in_index; /**< Index of output state in */
+ int32_t cell_state_in_index; /**< Index of cell state in */
+ int32_t activation_index; /**< Index of activation */
+ int32_t cell_threshold_index; /**< Index of cell threshold */
+ int32_t projection_threshold_index; /**< Index of projection threshold */
+
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace LSTM
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LSTM_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.cc b/runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.cc
new file mode 100644
index 000000000..0c80f1f5c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.cc
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/MaxPool2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace MaxPool2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace MaxPool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace MaxPool2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 10 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding_left index
+ // 2 -> Padding_right index
+ // 3 -> Padding_top index
+ // 4 -> Padding_bottom index
+ // 5 -> Horizontal (over width) Stride Index
+ // 6 -> Vertial (over height) Stride Index
+ // 7 -> Filter Width Index
+ // 8 -> Filter Height Index
+ // 9 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_left_index = inputs[1];
+ padding_right_index = inputs[2];
+ padding_top_index = inputs[3];
+ padding_bottom_index = inputs[4];
+ hstride_index = inputs[5];
+ vstride_index = inputs[6];
+ kw_index = inputs[7];
+ kh_index = inputs[8];
+ activation_index = inputs[9];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 7 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Tensor Index
+ // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+ // 2 -> Horizontal (over width) Stride Index
+ // 3 -> Vertial (over height) Stride Index
+ // 4 -> Filter Width Index
+ // 5 -> Filter Height Index
+ // 6 -> FuseCode (activation) Index
+ ifm_index = inputs[0];
+ padding_index = inputs[1];
+ hstride_index = inputs[2];
+ vstride_index = inputs[3];
+ kw_index = inputs[4];
+ kh_index = inputs[5];
+ activation_index = inputs[6];
+}
+
+} // namespace Implicit
+} // namespace MaxPool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.h b/runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.h
new file mode 100644
index 000000000..329ccecb7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/MaxPool2D.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file MaxPool2D.h
+ * @brief This file contains accept function and params for MaxPool2D operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_MAX_POOL_2D_H__
+#define __INTERNAL_OP_MAX_POOL_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace MaxPool2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct of MaxPool2D(Explicit) operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+
+ int32_t kw_index; /**< Kernel width index */
+ int32_t kh_index; /**< Kernel height index */
+
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
+
+ int32_t padding_left_index; /**< Left padding index */
+ int32_t padding_right_index; /**< Right padding index */
+ int32_t padding_top_index; /**< Top padding index */
+ int32_t padding_bottom_index; /**< Bottom padding index */
+
+ int32_t activation_index; /**< Activation index */
+
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Explicit) as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Explicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for MaxPool2D(Explicit)
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for MaxPool2D(Explicit) with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for MaxPool2D(Explicit)
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for MaxPool2D(Explicit)
+ * @return Parameters of MaxPool2D(Explicit)
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for MaxPool2D(Explicit)
+ * @param [in] v Node visitor for invoking visit function of MaxPool2D(Explicit)
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+/**
+ * @brief Struct of MaxPool2D(Implicit) operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+
+ int32_t kw_index; /**< Kernel width index */
+ int32_t kh_index; /**< Kernel height index */
+
+ int32_t hstride_index; /**< Horizontal stride index */
+ int32_t vstride_index; /**< Vertical stride index */
+
+ int32_t padding_index; /**< Padding index */
+ int32_t activation_index; /**< Activation index */
+
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Implicit) as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for MaxPool2D(Implicit) with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for MaxPool2D(Implicit)
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for MaxPool2D(Implicit) with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for MaxPool2D(Implicit)
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for MaxPool2D(Implicit)
+ * @return Parameters of MaxPool2D(Implicit)
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for MaxPool2D(Implicit)
+ * @param [in] v Node visitor for invoking visit function of MaxPool2D(Implicit)
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Implicit
+} // namespace MaxPool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_MAX_POOL_2D_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Mean.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Mean.cc
new file mode 100644
index 000000000..222a3ee4a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Mean.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Mean.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Mean
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Mean
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Mean
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> ifm Tensor Index
+ // 1 -> axis Tensor Index
+ // 2 -> keep_dims Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+ keep_dims_index = inputs[2];
+}
+
+} // namespace Mean
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h b/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h
new file mode 100644
index 000000000..f8e7ed308
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Mean.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Mean.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Mean::Param struct
+ * and internal::tflite::op::Mean::Node class
+ */
+#ifndef __INTERNAL_OP_MEAN_H__
+#define __INTERNAL_OP_MEAN_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Mean
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */ // output
+
+ int32_t ifm_index; /**< Index of input feature map */ // input
+ int32_t axis_index; /**< Index of axis */ // axis
+ int32_t keep_dims_index; /**< Index of keep dims */ // keep_dims
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Mean
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_MEAN_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Mul.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Mul.cc
new file mode 100644
index 000000000..2a2ae00ed
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Mul.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Mul.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Mul
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Mul
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Mul
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ // 2 -> Activation Index
+ lhs_index = inputs[0];
+ rhs_index = inputs[1];
+ activation_index = inputs[2];
+}
+
+} // namespace Mul
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Mul.h b/runtime/contrib/pure_arm_compute/src/internal/op/Mul.h
new file mode 100644
index 000000000..9710dd057
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Mul.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Mul.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Mul class
+ */
+#ifndef __INTERNAL_OP_MUL_H__
+#define __INTERNAL_OP_MUL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Mul
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /** Index of output feature map */
+
+ int32_t lhs_index; /** Index of lhs */
+ int32_t rhs_index; /** Index of rhs */
+ int32_t activation_index; /** Index of activation */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Mul
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_MUL_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Neg.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Neg.cc
new file mode 100644
index 000000000..72fecf484
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Neg.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Neg.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Neg.h b/runtime/contrib/pure_arm_compute/src/internal/op/Neg.h
new file mode 100644
index 000000000..77507df3d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Neg.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_NEG_H__
+#define __INTERNAL_OP_NEG_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NEG_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Node.h b/runtime/contrib/pure_arm_compute/src/internal/op/Node.h
new file mode 100644
index 000000000..be1cbdb5b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Node.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Node.h
+ * @brief This file contains struct of Node and NodeVisitor
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_NODE_H__
+#define __INTERNAL_OP_NODE_H__
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+
+/**
+ * @brief Struct of operation NodeVisitor
+ */
+struct NodeVisitor;
+
+/**
+ * @brief Struct of operation Node
+ */
+struct Node
+{
+ /**
+ * @brief Destroy the Node object for operation
+ */
+ virtual ~Node() = default;
+
+ /**
+ * @brief Function for accepting node for operation
+ * @param [in] v Node visitor for invoking visit function of operation
+ * @return N/A
+ */
+ virtual void accept(NodeVisitor &&) const = 0;
+};
+
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NODE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtime/contrib/pure_arm_compute/src/internal/op/NodeVisitor.h
new file mode 100644
index 000000000..0c1a4001d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/NodeVisitor.h
@@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file NodeVisitor.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines NodeVisitor
+ */
+
+#ifndef __INTERNAL_OP_NODE_VISITOR_H__
+#define __INTERNAL_OP_NODE_VISITOR_H__
+
+#include "internal/op/Add.h"
+#include "internal/op/Sub.h"
+#include "internal/op/Mul.h"
+#include "internal/op/Div.h"
+#include "internal/op/Conv2D.h"
+#include "internal/op/DepthwiseConv2D.h"
+#include "internal/op/Dequantize.h"
+#include "internal/op/MaxPool2D.h"
+#include "internal/op/AvgPool2D.h"
+#include "internal/op/ArgMax.h"
+#include "internal/op/Concat.h"
+#include "internal/op/Reshape.h"
+#include "internal/op/ResizeBilinear.h"
+#include "internal/op/StridedSlice.h"
+#include "internal/op/FullyConnected.h"
+#include "internal/op/Softmax.h"
+#include "internal/op/ReduceMax.h"
+#include "internal/op/ReduceMin.h"
+#include "internal/op/Cast.h"
+#include "internal/op/TopKV2.h"
+#include "internal/op/Gather.h"
+#include "internal/op/PReLU.h"
+#include "internal/op/ReLU.h"
+#include "internal/op/ReLU1.h"
+#include "internal/op/ReLU6.h"
+#include "internal/op/Tanh.h"
+#include "internal/op/Squeeze.h"
+#include "internal/op/Logistic.h"
+#include "internal/op/Mean.h"
+#include "internal/op/Rnn.h"
+#include "internal/op/Transpose.h"
+#include "internal/op/Lstm.h"
+#include "internal/op/Floor.h"
+#include "internal/op/Split.h"
+#include "internal/op/RSQRT.h"
+#include "internal/op/SQRT.h"
+#include "internal/op/Pad.h"
+#include "internal/op/SpaceToDepth.h"
+#include "internal/op/SpaceToBatchND.h"
+#include "internal/op/L2Pool2D.h"
+#include "internal/op/EmbeddingLookup.h"
+#include "internal/op/HashtableLookup.h"
+#include "internal/op/L2Normalization.h"
+#include "internal/op/SquaredDifference.h"
+#include "internal/op/LocalResponseNormalization.h"
+#include "internal/op/DepthToSpace.h"
+#include "internal/op/Unpack.h"
+#include "internal/op/Neg.h"
+#include "internal/op/Exp.h"
+#include "internal/op/ReduceSum.h"
+#include "internal/op/Equal.h"
+#include "internal/op/BatchToSpaceNd.h"
+#include "internal/op/TransposeConv.h"
+#include "internal/op/Pack.h"
+#include "internal/op/Abs.h"
+#include "internal/op/NotEqual.h"
+#include "internal/op/LogicalAnd.h"
+#include "internal/op/LogicalNot.h"
+#include "internal/op/LogicalOr.h"
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+
+/**
+ * @brief Struct to define visitor for operation Nodes
+ */
+struct NodeVisitor
+{
+ /**
+ * @brief Destruct NodeVisitor object with default
+ */
+ virtual ~NodeVisitor() = default;
+
+ /**
+ * @brief Visit an Add node
+ * @param[in] node Add node to visit
+ * @return N/A
+ */
+ virtual void visit(const Add::Node &) = 0;
+ /**
+ * @brief Visit a Mul node
+ * @param[in] node Mul node to visit
+ * @return N/A
+ */
+ virtual void visit(const Sub::Node &) = 0;
+ /**
+ * @brief Visit a Mul node
+ * @param[in] node Mul node to visit
+ * @return N/A
+ */
+ virtual void visit(const Mul::Node &) = 0;
+ /**
+ * @brief Visit a Div node
+ * @param[in] node Div node to visit
+ * @return N/A
+ */
+ virtual void visit(const Div::Node &) = 0;
+ /**
+ * @brief Visit a Conv2D node with implicit padding
+ * @param[in] node Conv2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const Conv2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit a Conv2D node with explicit padding
+ * @param[in] node Conv2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const Conv2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit a DepthwiseConv2D node with implicit padding
+ * @param[in] node DepthwiseConv2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const DepthwiseConv2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit a DepthwiseConv2D node with explicit padding
+ * @param[in] node DepthwiseConv2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const DepthwiseConv2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit a Dequantize node
+ * @param[in] node Dequantize node to visit
+ * @return N/A
+ */
+ virtual void visit(const Dequantize::Node &) = 0;
+ /**
+ * @brief Visit a MaxPool2D node with implicit padding
+ * @param[in] node MaxPool2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const MaxPool2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit a MaxPool2D node with explicit padding
+ * @param[in] node MaxPool2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const MaxPool2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit an AvgPool2D node with implicit padding
+ * @param[in] node AvgPool2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const AvgPool2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit an AvgPool2D node with explicit padding
+ * @param[in] node AvgPool2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const AvgPool2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit a Concat node
+ * @param[in] node Concat node to visit
+ * @return N/A
+ */
+ virtual void visit(const Concat::Node &) = 0;
+ /**
+ * @brief Visit a ArgMax node
+ * @param[in] node ArgMax node to visit
+ * @return N/A
+ */
+ virtual void visit(const ArgMax::Node &) = 0;
+ /**
+ * @brief Visit an Reshape node
+ * @param[in] node Reshape node to visit
+ * @return N/A
+ */
+ virtual void visit(const Reshape::Node &) = 0;
+ /**
+ * @brief Visit an ResizeBilinear node
+ * @param[in] node ResizeBilinear node to visit
+ * @return N/A
+ */
+ virtual void visit(const ResizeBilinear::Node &) = 0;
+ /**
+ * @brief Visit a StridedSlice node
+ * @param[in] node StridedSlice node to visit
+ * @return N/A
+ */
+ virtual void visit(const StridedSlice::Node &) = 0;
+ /**
+ * @brief Visit a FullyConnected node
+ * @param[in] node FullyConnected node to visit
+ * @return N/A
+ */
+ virtual void visit(const FullyConnected::Node &) = 0;
+ /**
+ * @brief Visit a Softmax node
+ * @param[in] node Softmax node to visit
+ * @return N/A
+ */
+ virtual void visit(const Softmax::Node &) = 0;
+ /**
+ * @brief Visit a ReduceMax node
+ * @param[in] node ReduceMax node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReduceMax::Node &) = 0;
+ /**
+ * @brief Visit a ReduceMin node
+ * @param[in] node ReduceMin node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReduceMin::Node &) = 0;
+ /**
+ * @brief Visit a Cast node
+ * @param[in] node Cast node to visit
+ * @return N/A
+ */
+ virtual void visit(const Cast::Node &) = 0;
+ /**
+ * @brief Visit a TopKV2 node
+ * @param[in] node TopKV2 node to visit
+ * @return N/A
+ */
+ virtual void visit(const TopKV2::Node &) = 0;
+ /**
+ * @brief Visit a Gather node
+ * @param[in] node Gather node to visit
+ * @return N/A
+ */
+ virtual void visit(const Gather::Node &) = 0;
+ /**
+ * @brief Visit an PReLU node
+ * @param[in] node PReLU node to visit
+ * @return N/A
+ */
+ virtual void visit(const PReLU::Node &) = 0;
+ /**
+ * @brief Visit an ReLU node
+ * @param[in] node Relu node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReLU::Node &) = 0;
+ /**
+ * @brief Visit a ReLU1 node
+ * @param[in] node ReLU1 node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReLU1::Node &) = 0;
+ /**
+ * @brief Visit a ReLU6 node
+ * @param[in] node ReLU6 node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReLU6::Node &) = 0;
+ /**
+ * @brief Visit a Tanh node
+ * @param[in] node Tanh node to visit
+ * @return N/A
+ */
+ virtual void visit(const Tanh::Node &) = 0;
+ /**
+ * @brief Visit a Squeeze node
+ * @param[in] node Squeeze node to visit
+ * @return N/A
+ */
+ virtual void visit(const Squeeze::Node &) = 0;
+ /**
+ * @brief Visit an Logistic node
+ * @param[in] node Logistic node to visit
+ * @return N/A
+ */
+ virtual void visit(const Logistic::Node &) = 0;
+ /**
+ * @brief Visit a Mean node
+ * @param[in] node Mean node to visit
+ * @return N/A
+ */
+ virtual void visit(const Mean::Node &) = 0;
+ /**
+ * @brief Visit an RNN node
+ * @param[in] node RNN node to visit
+ * @return N/A
+ */
+ virtual void visit(const RNN::Node &) = 0;
+ /**
+ * @brief Visit a Transpose node
+ * @param[in] node Transpose node to visit
+ * @return N/A
+ */
+ virtual void visit(const Transpose::Node &) = 0;
+ /**
+ * @brief Visit an LSTM node
+ * @param[in] node LSTM node to visit
+ * @return N/A
+ */
+ virtual void visit(const LSTM::Node &) = 0;
+ /**
+ * @brief Visit a Floor node
+ * @param[in] node Floor node to visit
+ * @return N/A
+ */
+ virtual void visit(const Floor::Node &) = 0;
+ /**
+ * @brief Visit a Split node
+ * @param[in] node Split node to visit
+ * @return N/A
+ */
+ virtual void visit(const Split::Node &) = 0;
+ /**
+ * @brief Visit an RSQRT node
+ * @param[in] node RSQRT node to visit
+ * @return N/A
+ */
+ virtual void visit(const RSQRT::Node &) = 0;
+ /**
+ * @brief Visit an SQRT node
+ * @param[in] node SQRT node to visit
+ * @return N/A
+ */
+ virtual void visit(const SQRT::Node &) = 0;
+ /**
+ * @brief Visit a Pad node
+ * @param[in] node Pad node to visit
+ * @return N/A
+ */
+ virtual void visit(const Pad::Node &) = 0;
+ /**
+ * @brief Visit a SpaceToDepth node
+ * @param[in] node SpaceToDepth node to visit
+ * @return N/A
+ */
+ virtual void visit(const SpaceToDepth::Node &) = 0;
+ /**
+ * @brief Visit a SpaceToBatchND node
+ * @param[in] node SpaceToBatchND node to visit
+ * @return N/A
+ */
+ virtual void visit(const SpaceToBatchND::Node &) = 0;
+ /**
+ * @brief Visit an L2Pool2D node with implicit padding
+ * @param[in] node L2Pool2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const L2Pool2D::Implicit::Node &) = 0;
+ /**
+ * @brief Visit an L2Pool2D node with explicit padding
+ * @param[in] node L2Pool2D node to visit
+ * @return N/A
+ */
+ virtual void visit(const L2Pool2D::Explicit::Node &) = 0;
+ /**
+ * @brief Visit an EmbeddingLookup node
+ * @param[in] node EmbeddingLookup node to visit
+ * @return N/A
+ */
+ virtual void visit(const EmbeddingLookup::Node &) = 0;
+ /**
+ * @brief Visit a HashtableLookup node
+ * @param[in] node HashtableLookup node to visit
+ * @return N/A
+ */
+ virtual void visit(const HashtableLookup::Node &) = 0;
+ /**
+ * @brief Visit an L2Normalization node
+ * @param[in] node L2Normalization node to visit
+ * @return N/A
+ */
+ virtual void visit(const L2Normalization::Node &) = 0;
+ /**
+ * @brief Visit a SquaredDifference node
+ * @param[in] node SquaredDifference node to visit
+ * @return N/A
+ */
+ virtual void visit(const SquaredDifference::Node &) = 0;
+ /**
+ * @brief Visit a LocalResponseNormalization node
+ * @param[in] node LocalResponseNormalization node to visit
+ * @return N/A
+ */
+ virtual void visit(const LocalResponseNormalization::Node &) = 0;
+ /**
+ * @brief Visit a DepthToSpace node
+ * @param[in] node DepthToSpace node to visit
+ * @return N/A
+ */
+ virtual void visit(const DepthToSpace::Node &) = 0;
+ /**
+ * @brief Visit a Unpack node
+ * @param[in] node Unpack node to visit
+ * @return N/A
+ */
+ virtual void visit(const Unpack::Node &) = 0;
+ /**
+ * @brief Visit a Neg node
+ * @param[in] node Neg node to visit
+ * @return N/A
+ */
+ virtual void visit(const Neg::Node &) = 0;
+ /**
+ * @brief Visit a Exp node
+ * @param[in] node Exp node to visit
+ * @return N/A
+ */
+ virtual void visit(const Exp::Node &) = 0;
+ /**
+ * @brief Visit a ReduceSum node
+ * @param[in] node ReduceSum node to visit
+ * @return N/A
+ */
+ virtual void visit(const ReduceSum::Node &) = 0;
+ /**
+ * @brief Visit a Equal node
+ * @param[in] node Equal node to visit
+ * @return N/A
+ */
+ virtual void visit(const Equal::Node &) = 0;
+ /**
+ * @brief Visit a BatchToSpaceNd node
+ * @param[in] node BatchToSpaceNd node to visit
+ * @return N/A
+ */
+ virtual void visit(const BatchToSpaceNd::Node &) = 0;
+ /**
+ * @brief Visit a TransposeConv node
+ * @param[in] node TransposeConv node to visit
+ * @return N/A
+ */
+ virtual void visit(const TransposeConv::Node &) = 0;
+ /**
+ * @brief Visit a Pack node
+ * @param[in] node Pack node to visit
+ * @return N/A
+ */
+ virtual void visit(const Pack::Node &) = 0;
+ /**
+ * @brief Visit a Abs node
+ * @param[in] node Abs node to visit
+ * @return N/A
+ */
+ virtual void visit(const Abs::Node &) = 0;
+ /**
+ * @brief Visit a NotEqual node
+ * @param[in] node NotEqual node to visit
+ * @return N/A
+ */
+ virtual void visit(const NotEqual::Node &) = 0;
+ /**
+ * @brief Visit a LogicalAnd node
+ * @param[in] node LogicalAnd node to visit
+ * @return N/A
+ */
+ virtual void visit(const LogicalAnd::Node &) = 0;
+ /**
+ * @brief Visit a LogicalNot node
+ * @param[in] node LogicalNot node to visit
+ * @return N/A
+ */
+ virtual void visit(const LogicalNot::Node &) = 0;
+ /**
+ * @brief Visit a LogicalOr node
+ * @param[in] node LogicalOr node to visit
+ * @return N/A
+ */
+ virtual void visit(const LogicalOr::Node &) = 0;
+};
+
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NODE_VISITOR_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.cc b/runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.cc
new file mode 100644
index 000000000..2906e214b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/NotEqual.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input1 Tensor Index
+ // 1 -> input2 Tensor Index
+ input1_index = inputs[0];
+ input2_index = inputs[1];
+}
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.h b/runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.h
new file mode 100644
index 000000000..0d6130948
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/NotEqual.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_NOT_EQUAL_H__
+#define __INTERNAL_OP_NOT_EQUAL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input1_index;
+ int32_t input2_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NOT_EQUAL_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/PReLU.cc b/runtime/contrib/pure_arm_compute/src/internal/op/PReLU.cc
new file mode 100644
index 000000000..25b06505b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/PReLU.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/PReLU.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> alpha Tensor Index
+ ifm_index = inputs[0];
+ alpha_index = inputs[1];
+}
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/PReLU.h b/runtime/contrib/pure_arm_compute/src/internal/op/PReLU.h
new file mode 100644
index 000000000..ae754abb4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/PReLU.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReLU.h
+ * @brief This file contains accept function and params for ReLU operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_PRELU_H__
+#define __INTERNAL_OP_PRELU_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+/**
+ * @brief Struct of PReLU operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+ int32_t alpha_index; /**< Alpha input index */
+
+ /**
+ * @brief Construct a new Param object for ReLU as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for PReLU with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for PReLU
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for PReLU with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for PReLU
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for PReLU
+ * @return Parameters of PReLU
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for PReLU
+ * @param [in] v Node visitor for invoking visit function of PReLU
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PRELU_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Pack.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Pack.cc
new file mode 100644
index 000000000..73f89b840
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Pack.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Pack.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(outputCount == 1);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 .. n - 3 -> Input Tensor Index
+ // n - 2 -> Input Tensor counts (will be ignored)
+ // n - 1 -> Input Axis Index
+ ofm_index = outputs[0];
+ axis_index = inputs[inputCount - 1];
+ // last input is axis along which packing is required
+ for (uint32_t n = 0; n < inputCount - 2; ++n)
+ {
+ ifm_indexes.emplace_back(inputs[n]);
+ }
+}
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Pack.h b/runtime/contrib/pure_arm_compute/src/internal/op/Pack.h
new file mode 100644
index 000000000..c5de01bd8
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Pack.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_PACK_H__
+#define __INTERNAL_OP_PACK_H__
+
+#include "internal/op/Node.h"
+#include <vector>
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+struct Param
+{
+ int32_t ofm_index;
+ // There are N+1 inputs, 0 to N-1 are tensors of same shape
+ // Nth input is axis index along which stack is needed to be done.
+ std::vector<int32_t> ifm_indexes;
+ int32_t axis_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PACK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Pad.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Pad.cc
new file mode 100644
index 000000000..00938242b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Pad.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Pad.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ // 1 -> paddings
+ ifm_index = inputs[0];
+ paddings_index = inputs[1];
+}
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Pad.h b/runtime/contrib/pure_arm_compute/src/internal/op/Pad.h
new file mode 100644
index 000000000..68752a10e
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Pad.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Pad.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Pad node
+ */
+
+#ifndef __INTERNAL_OP_PAD_H__
+#define __INTERNAL_OP_PAD_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+/**
+ * @brief Struct to manipulate parameter for Pad operation
+ */
+struct Param
+{
+ int32_t ifm_index; //!< index for input
+ int32_t paddings_index; //!< index for padding
+ int32_t ofm_index; //!< index for output
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Pad Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new RNN Node object
+ * @param param Parameter for RNN Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Pad node
+};
+
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PAD_H_
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.cc b/runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.cc
new file mode 100644
index 000000000..d79563d14
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/RSQRT.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace RSQRT
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace RSQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace RSQRT
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ input_index = inputs[0];
+}
+
+} // namespace RSQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.h b/runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.h
new file mode 100644
index 000000000..e39d60241
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/RSQRT.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file RSQRT.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::RSQRT::Param struct
+ * and internal::tflite::op::RSQRT::Node class
+ */
+#ifndef __INTERNAL_OP_RSQRT_H__
+#define __INTERNAL_OP_RSQRT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace RSQRT
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+
+ int32_t input_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace RSQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RSQRT_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReLU.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU.cc
new file mode 100644
index 000000000..2f48372af
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReLU.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace ReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReLU.h b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU.h
new file mode 100644
index 000000000..aaa39b523
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReLU.h
+ * @brief This file contains accept function and params for ReLU operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_RELU_H__
+#define __INTERNAL_OP_RELU_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU
+{
+
+/**
+ * @brief Struct of ReLU operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+
+ /**
+ * @brief Construct a new Param object for ReLU as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for ReLU with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for ReLU
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for ReLU with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for ReLU
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for ReLU
+ * @return Parameters of ReLU
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for ReLU
+ * @param [in] v Node visitor for invoking visit function of ReLU
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RELU_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.cc
new file mode 100644
index 000000000..1925ac404
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReLU1.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU1
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReLU1
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU1
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace ReLU1
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.h b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.h
new file mode 100644
index 000000000..330445af8
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU1.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReLU1.h
+ * @brief This file contains accept function and params for ReLU1 operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_RELU1_H__
+#define __INTERNAL_OP_RELU1_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU1
+{
+
+/**
+ * @brief Struct of ReLU1 operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+
+ /**
+ * @brief Construct a new Param object for ReLU1 as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for ReLU1 with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for ReLU1
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for ReLU1 with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for ReLU1
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for ReLU1
+ * @return Parameters of ReLU1
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for ReLU1
+ * @param [in] v Node visitor for invoking visit function of ReLU1
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReLU1
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RELU1_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.cc
new file mode 100644
index 000000000..e94ddcf15
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReLU6.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU6
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReLU6
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU6
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace ReLU6
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.h b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.h
new file mode 100644
index 000000000..6fc2c24fe
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReLU6.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReLU6.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ReLU6 class
+ */
+#ifndef __INTERNAL_OP_RELU6_H__
+#define __INTERNAL_OP_RELU6_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReLU6
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /** Index of output feature map */
+
+ int32_t ifm_index; /** Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReLU6
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RELU6_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.cc
new file mode 100644
index 000000000..7a337eabf
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceMax.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMax
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMax
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ReduceMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.h b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.h
new file mode 100644
index 000000000..77d8bd869
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMax.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReduceMax.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ReduceMax::Param struct
+ * and internal::tflite::op::ReduceMax::Node class
+ */
+#ifndef __INTERNAL_OP_REDUCEMAX_H__
+#define __INTERNAL_OP_REDUCEMAX_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMax
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t axis_index; /**< Index of axis */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReduceMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCEMAX_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.cc
new file mode 100644
index 000000000..72b6079d4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceMin.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.h b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.h
new file mode 100644
index 000000000..5dd82ec43
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceMin.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReduceMin.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ReduceMin::Param struct
+ * and internal::tflite::op::ReduceMin::Node class
+ */
+#ifndef __INTERNAL_OP_REDUCEMIN_H__
+#define __INTERNAL_OP_REDUCEMIN_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t axis_index; /**< Index of axis */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCEMIN_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.cc
new file mode 100644
index 000000000..4d83c1734
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceSum.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Axis Tensor Index
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+}
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.h b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.h
new file mode 100644
index 000000000..9c661f63a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ReduceSum.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_REDUCESUM_H__
+#define __INTERNAL_OP_REDUCESUM_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t ifm_index;
+ int32_t axis_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCESUM_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Reshape.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Reshape.cc
new file mode 100644
index 000000000..862ed30c7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Reshape.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Reshape.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Reshape
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Reshape
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Reshape
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> A tensor, specifying the tensor to be reshaped.
+ // 1 -> A 1-D tensor of type ANEURALNETWORKS_TENSOR_INT32, defining the shape of the output
+ // tensor
+ input_index = inputs[0];
+ shape_index = inputs[1];
+}
+
+} // namespace Reshape
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Reshape.h b/runtime/contrib/pure_arm_compute/src/internal/op/Reshape.h
new file mode 100644
index 000000000..7152eaece
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Reshape.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Reshape.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Reshape node
+ */
+
+#ifndef __INTERNAL_OP_RESHAPE_H__
+#define __INTERNAL_OP_RESHAPE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Reshape
+{
+
+/**
+ * @brief Struct to manipulate parameter for Reshape operation
+ */
+struct Param
+{
+ int32_t output_index; //!< index for output feature map
+
+ int32_t input_index; //!< index for input feature map
+ int32_t shape_index; //!< index for shape
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Reshape Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Reshape Node object
+ * @param param Parameter for Reshape Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Reshape node
+};
+
+} // namespace Reshape
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RESHAPE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.cc b/runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.cc
new file mode 100644
index 000000000..02ec20cb1
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ResizeBilinear.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ResizeBilinear
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ResizeBilinear
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ResizeBilinear
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> IFM Index
+ // 1 -> Height Index
+ // 2 -> Width Index
+ ifm_index = inputs[0];
+ height_index = inputs[1];
+ width_index = inputs[2];
+}
+
+} // namespace ResizeBilinear
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.h b/runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.h
new file mode 100644
index 000000000..f2eab4aaf
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/ResizeBilinear.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ResizeBilinear.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::ResizeBilinear::Param struct
+ * and internal::tflite::op::ResizeBilinear::Node class
+ */
+#ifndef __INTERNAL_OP_RESIZE_BILINEAR_H__
+#define __INTERNAL_OP_RESIZE_BILINEAR_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ResizeBilinear
+{
+
+/**
+ * @brief Struct to have indexes for ResizeBilinear operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t ifm_index; /**< Index of input feature map */
+ int32_t height_index; /**< Index of height */
+ int32_t width_index; /**< Index of width */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an ResizeBilinear operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace ResizeBilinear
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RESIZE_BILINEAR_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Rnn.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Rnn.cc
new file mode 100644
index 000000000..3f19fca3b
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Rnn.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Rnn.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace RNN
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace RNN
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace RNN
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 6 && outputCount == 2);
+
+ output_index = outputs[0];
+ hidden_state_out_index = outputs[1];
+
+ input_index = inputs[0];
+ weights_index = inputs[1];
+ recurrent_weights_index = inputs[2];
+ bias_index = inputs[3];
+ hidden_state_in_index = inputs[4];
+ fused_activation_index = inputs[5];
+}
+
+} // namespace RNN
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Rnn.h b/runtime/contrib/pure_arm_compute/src/internal/op/Rnn.h
new file mode 100644
index 000000000..7b2a10843
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Rnn.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Rnn.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines RNN node
+ */
+
+#ifndef __INTERNAL_OP_RNN_H__
+#define __INTERNAL_OP_RNN_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace RNN
+{
+
+/**
+ * @brief Struct to manipulate parameter for RNN operation
+ */
+struct Param
+{
+ int32_t output_index; //!< index for ouuput
+ int32_t hidden_state_out_index; //!< index for hidden state output
+
+ int32_t input_index; //!< index for input
+ int32_t weights_index; //!< index for weight
+ int32_t recurrent_weights_index; //!< index for recurrent weights
+ int32_t bias_index; //!< index for bias
+ int32_t hidden_state_in_index; //!< index for hidden state input
+ int32_t fused_activation_index; //!< index for fused activation
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define RNN Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new RNN Node object
+ * @param param Parameter for RNN Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for RNN node
+};
+
+} // namespace RNN
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_RNN_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SQRT.cc b/runtime/contrib/pure_arm_compute/src/internal/op/SQRT.cc
new file mode 100644
index 000000000..70ce42e9c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SQRT.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SQRT.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ // 0 -> input Tensor Index
+ input_index = inputs[0];
+}
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SQRT.h b/runtime/contrib/pure_arm_compute/src/internal/op/SQRT.h
new file mode 100644
index 000000000..85dfb97a7
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SQRT.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file SQRT.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::SQRT::Param struct
+ * and internal::tflite::op::SQRT::Node class
+ */
+#ifndef __INTERNAL_OP_SQRT_H__
+#define __INTERNAL_OP_SQRT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+
+ int32_t input_index; /**< Index of input feature map */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SQRT_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Softmax.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Softmax.cc
new file mode 100644
index 000000000..516a0fa04
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Softmax.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Softmax.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Softmax
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Softmax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Softmax
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+ // 1 -> FLOAT32 value, specifying the positive scaling factor for the exponent, beta.
+ input_index = inputs[0];
+ scale_index = inputs[1];
+}
+
+} // namespace Softmax
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Softmax.h b/runtime/contrib/pure_arm_compute/src/internal/op/Softmax.h
new file mode 100644
index 000000000..6e631af5f
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Softmax.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Softmax.h
+ * @brief This file contains accept function and params for Softmax operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_SOFTMAX_H__
+#define __INTERNAL_OP_SOFTMAX_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Softmax
+{
+
+/**
+ * @brief Struct of Softmax operation's param
+ */
+struct Param
+{
+ int32_t output_index; /**< Output index */
+
+ int32_t input_index; /**< Input index */
+ int32_t scale_index; /**< Scale index */
+
+ /**
+ * @brief Construct a new Param object for Softmax as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Softmax with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Softmax
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for Softmax with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for Softmax
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for Softmax
+ * @return Parameters of Softmax
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for Softmax
+ * @param [in] v Node visitor for invoking visit function of Softmax
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Softmax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SOFTMAX_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.cc b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.cc
new file mode 100644
index 000000000..9ab026cf4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SpaceToBatchND.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ // 2 -> Padding size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+ padding_size_index = inputs[2];
+}
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.h b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.h
new file mode 100644
index 000000000..650d068f4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToBatchND.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_SPACETOBATCHND_H__
+#define __INTERNAL_OP_SPACETOBATCHND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+struct Param
+{
+ int32_t output_index;
+
+ int32_t input_index;
+ int32_t block_size_index;
+ int32_t padding_size_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SPACETOBATCHND_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.cc b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.cc
new file mode 100644
index 000000000..2fb587be0
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SpaceToDepth.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToDepth
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SpaceToDepth
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToDepth
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ output_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Block size Index
+ input_index = inputs[0];
+ block_size_index = inputs[1];
+}
+
+} // namespace SpaceToDepth
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.h b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.h
new file mode 100644
index 000000000..2e624006a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SpaceToDepth.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file SpaceToDepth.h
+ * @brief This file contains accept function and params for SpaceToDepth operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_SPACETODEPTH_H__
+#define __INTERNAL_OP_SPACETODEPTH_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToDepth
+{
+
+/**
+ * @brief Struct of SpaceToDepth operation's param
+ */
+struct Param
+{
+ int32_t output_index; /**< Output index */
+
+ int32_t input_index; /**< Input index */
+ int32_t block_size_index; /**< Block size index */
+
+ /**
+ * @brief Construct a new Param object for SpaceToDepth as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for SpaceToDepth with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for SpaceToDepth
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for SpaceToDepth with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for SpaceToDepth
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for SpaceToDepth
+ * @return Parameters of SpaceToDepth
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for SpaceToDepth
+ * @param [in] v Node visitor for invoking visit function of SpaceToDepth
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace SpaceToDepth
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SPACETODEPTH_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Split.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Split.cc
new file mode 100644
index 000000000..6457a106a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Split.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Split.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Split
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Split
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Split
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3);
+
+ // Each input should be interpreted as follows:
+ // 0 -> An n-D tensor, specifying the tensor to be split.
+ // 1 -> A 0-D int32 tensor, indicating the dimension along which to split.
+ // 2 -> A 0-D int32 tensor, indicating the number of outputs
+ // (It can be ignored on pacl becasue pacl don't support dynamic tensor shape,
+ // and can be used for verification only)
+ ifm_index = inputs[0];
+ axis_index = inputs[1];
+
+ // Each output should be interpreted as follow:
+ // [0, outputCount) -> An n-D tensor.
+ for (uint32_t n = 0; n < outputCount; ++n)
+ {
+ ofm_indexes.emplace_back(outputs[n]);
+ }
+}
+
+} // namespace Split
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Split.h b/runtime/contrib/pure_arm_compute/src/internal/op/Split.h
new file mode 100644
index 000000000..cb5f3eb2d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Split.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Split.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Split node
+ */
+
+#ifndef __INTERNAL_OP_SPLIT_H__
+#define __INTERNAL_OP_SPLIT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Split
+{
+
+/**
+ * @brief Struct to manipulate parameter for Split operation
+ */
+struct Param
+{
+ int32_t ifm_index; //!< index for input feature map
+ int32_t axis_index; //!< index for axis
+
+ std::vector<int32_t> ofm_indexes; //!< index for output feature map
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Split Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Split Node object
+ * @param param Parameter for Split Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Split node
+};
+
+} // namespace Split
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SPLIT_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.cc b/runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.cc
new file mode 100644
index 000000000..f6c8bc5df
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SquaredDifference.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SquaredDifference
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SquaredDifference
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SquaredDifference
+{
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ lhs_index = inputs[0];
+ rhs_index = inputs[1];
+}
+
+} // namespace SquaredDifference
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.h b/runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.h
new file mode 100644
index 000000000..ecbb03209
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/SquaredDifference.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file SquaredDifference.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::SquaredDifference::Param struct
+ * and internal::tflite::op::SquaredDifference::Node class
+ */
+#ifndef __INTERNAL_OP_SQUAREDDIFFERENCE_H__
+#define __INTERNAL_OP_SQUAREDDIFFERENCE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SquaredDifference
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Index of output feature map */
+
+ int32_t lhs_index; /**< Index of lhs */
+ int32_t rhs_index; /**< Index of rhs */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace SquaredDifference
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SQUAREDDIFFERENCE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.cc
new file mode 100644
index 000000000..6e89cd321
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Squeeze.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Squeeze
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Squeeze
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Squeeze
+{
+// dims_index is optional input
+// if dims_index is not provided, dims_index is set to -1
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 || inputCount == 2);
+ assert(outputCount == 1);
+
+ output_index = outputs[0];
+
+ input_index = inputs[0];
+
+ // dims_index_optional = -1 by default
+ if (inputCount == 2)
+ dims_index_optional = inputs[1];
+}
+
+} // namespace Squeeze
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.h b/runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.h
new file mode 100644
index 000000000..d5f36f85f
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Squeeze.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Squeeze.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines internal::tflite::op::Squeeze::Param struct
+ * and internal::tflite::op::Squeeze::Node class
+ */
+#ifndef __INTERNAL_OP_SQUEEZE_H__
+#define __INTERNAL_OP_SQUEEZE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Squeeze
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+ int32_t output_index; /**< Index of output feature map */
+
+ int32_t input_index; /**< Index of input feature map */
+ // optional param. default is -1
+ int32_t dims_index_optional = -1; /**< Index of dims */
+ /**
+ * @brief Construct as default
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount Count of inputs
+ * @param[in] inputs Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs Pointer of outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object with param
+ * @param[in] param Param object that makes up a Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destruct as default
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get a reference of Param object
+ * @return Reference of Param object
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Visit this Node by NodeVisitor
+ * @param[in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Squeeze
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SQUEEZE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.cc b/runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.cc
new file mode 100644
index 000000000..6e7958954
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/StridedSlice.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace StridedSlice
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace StridedSlice
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace StridedSlice
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 7 && outputCount == 1);
+
+ outputData_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> An n-D tensor, specifying the tensor to be sliced.
+ // 1 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the starts of
+ // the dimensions of the input tensor to be sliced. The length must be
+ // of rank(input0).
+ // 2 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the ends of
+ // the dimensions of the input tensor to be sliced. The length must be
+ // of rank(input0).
+ // 3 -> A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the strides of
+ // the dimensions of the input tensor to be sliced. The length must be
+ // of rank(input0).
+ // 4 -> An {@link ANEURALNETWORKS_INT32} scalar, begin_mask. If the ith bit
+ // of begin_mask is set, begin[i] is ignored and the fullest possible
+ // range in that dimension is used instead.
+ // 5 -> An {@link ANEURALNETWORKS_INT32} scalar, end_mask. If the ith bit of
+ // end_mask is set, end[i] is ignored and the fullest possible range in
+ // that dimension is used instead.
+ // 6 -> An {@link ANEURALNETWORKS_INT32} scalar, shrink_axis_mask. An int32
+ // mask. If the ith bit of shrink_axis_mask is set, it implies that the
+ // ith specification shrinks the dimensionality by 1. A slice of size 1
+ // starting from begin[i] in the dimension must be preserved.
+ inputData_index = inputs[0];
+ startData_index = inputs[1];
+ endData_index = inputs[2];
+ stridesData_index = inputs[3];
+ beginMask_index = inputs[4];
+ endMask_index = inputs[5];
+ shrinkAxisMask_index = inputs[6];
+}
+
+} // namespace StridedSlice
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.h b/runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.h
new file mode 100644
index 000000000..21dbb9e68
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/StridedSlice.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file StridedSlice.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines StridedSlice node
+ */
+
+#ifndef __INTERNAL_OP_STRIDEDSLICE_H__
+#define __INTERNAL_OP_STRIDEDSLICE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace StridedSlice
+{
+
+/**
+ * @brief Struct to manipulate parameter for StridedSlice operation
+ */
+struct Param
+{
+ int32_t outputData_index; //!< index for output data
+
+ int32_t inputData_index; //!< index for input data
+ int32_t startData_index; //!< index where slicing start from
+ int32_t endData_index; //!< index where slicing ends to
+ int32_t stridesData_index; //!< index for stride value
+ int32_t beginMask_index; //!< index for beginmask
+ int32_t endMask_index; //!< index for endmask
+ int32_t shrinkAxisMask_index; //!< index for shrink axis
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define StridedSlice Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new StridedSlice Node object
+ * @param param Parameter for StridedSlice Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for StridedSlice node
+};
+
+} // namespace StridedSlice
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_STRIDEDSLICE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Sub.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Sub.cc
new file mode 100644
index 000000000..74efe3c3a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Sub.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Sub.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Sub
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Sub
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Sub
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> LHS Tensor Index
+ // 1 -> RHS Tensor Index
+ // 2 -> Activation Index
+ lhs_index = inputs[0];
+ rhs_index = inputs[1];
+ activation_index = inputs[2];
+}
+
+} // namespace Sub
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Sub.h b/runtime/contrib/pure_arm_compute/src/internal/op/Sub.h
new file mode 100644
index 000000000..864359d1e
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Sub.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Sub.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines SUB Node
+ */
+
+#ifndef __INTERNAL_OP_SUB_H__
+#define __INTERNAL_OP_SUB_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Sub
+{
+
+/**
+ * @brief Struct to manipulate parameters for SUB
+ */
+struct Param
+{
+ int32_t ofm_index; //!< index for output feature map
+
+ int32_t lhs_index; //!< index for left-hand side
+ int32_t rhs_index; //!< index for right-hand side
+ int32_t activation_index; //!< index for activation function
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define SUB Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Sub Node object
+ * @param param Parameter for Sub Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for SUB node
+};
+
+} // namespace Sub
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SUB_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Tanh.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Tanh.cc
new file mode 100644
index 000000000..fbd72abe4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Tanh.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Tanh.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Tanh
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Tanh
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Tanh
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 1 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ ifm_index = inputs[0];
+}
+
+} // namespace Tanh
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Tanh.h b/runtime/contrib/pure_arm_compute/src/internal/op/Tanh.h
new file mode 100644
index 000000000..fd87297f1
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Tanh.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Tanh.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines TANH node
+ */
+
+#ifndef __INTERNAL_OP_TANH_H__
+#define __INTERNAL_OP_TANH_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Tanh
+{
+
+/**
+ * @brief Struct to manipulate parameter for hyperbolic tangent operation
+ */
+struct Param
+{
+ int32_t ofm_index; //!< index for output feature map
+
+ int32_t ifm_index; //!< index for input feature map
+
+ /**
+ * @brief Default Constructor
+ */
+ Param() = default;
+ /**
+ * @brief Construct a new Param object
+ * @param[in] inputCount the number of inputs
+ * @param[in] inputs pointer for input data
+ * @param[in] outputCount the number of outputs
+ * @param[in] outputs pointer for input data
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Tanh Operation
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Tanh Node object
+ * @param param Parameter for Tanh Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Default Destructor
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameter
+ * @return Param reference
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Accept a NodeVisitor so that it can visit this node
+ * @param [in] v Visitor
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param; //!< parameter for Tanh node
+};
+
+} // namespace Tanh
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_TANH_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.cc b/runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.cc
new file mode 100644
index 000000000..74d9a69d2
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/TopKV2.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TopKV2
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace TopKV2
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TopKV2
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 2);
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Index for Output Values
+ // 1 -> Index for Output Indices
+ outputValues_index = outputs[0];
+ outputIndices_index = outputs[1];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Index for Input Data
+ // 1 -> Index for K
+ inputData_index = inputs[0];
+ k_index = inputs[1];
+}
+
+} // namespace TopKV2
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.h b/runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.h
new file mode 100644
index 000000000..02b7827e9
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/TopKV2.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TopKV2.h
+ * @brief This file contains accept function and params for TopKV2 operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_TOPKV2_H__
+#define __INTERNAL_OP_TOPKV2_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TopKV2
+{
+
+/**
+ * @brief Struct of TopKV2 operation's param
+ */
+struct Param
+{
+ int32_t outputValues_index; /**< Output values index */
+ int32_t outputIndices_index; /**< Output indices index */
+
+ int32_t inputData_index; /**< Input data index */
+ int32_t k_index; /**< K value index */
+
+ /**
+ * @brief Construct a new Param object for TopKV2 as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for TopKV2 with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for TopKV2
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for TopKV2 with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for TopKV2
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for TopKV2
+ * @return Parameters of TopKV2
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for TopKV2
+ * @param [in] v Node visitor for invoking visit function of TopKV2
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace TopKV2
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_TOPKV2_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Transpose.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Transpose.cc
new file mode 100644
index 000000000..0529e3790
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Transpose.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Transpose.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Transpose
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Transpose
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Transpose
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 2 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Input Tensor Index
+ // 1 -> Permutation Tensor Index
+ ifm_index = inputs[0];
+ permu_index = inputs[1];
+}
+
+} // namespace Transpose
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Transpose.h b/runtime/contrib/pure_arm_compute/src/internal/op/Transpose.h
new file mode 100644
index 000000000..bb01bf322
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Transpose.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Transpose.h
+ * @brief This file contains accept function and params for Transpose operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_TRANSPOSE_H__
+#define __INTERNAL_OP_TRANSPOSE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Transpose
+{
+
+/**
+ * @brief Struct of Transpose operation's param
+ */
+struct Param
+{
+ int32_t ofm_index; /**< Output format index */
+
+ int32_t ifm_index; /**< Input format index */
+ int32_t permu_index; /**< Permutation index */
+
+ /**
+ * @brief Construct a new Param object for Transpose as default
+ */
+ Param() = default;
+
+ /**
+ * @brief Construct a new Param object for Transpose with params
+ * @param [in] inputCount The number of input
+ * @param [in] inputs Array containing inputs
+ * @param [in] outputCount The number of output
+ * @param [in] outputs Array containing outputs
+ */
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for Transpose
+ */
+class Node final : public op::Node
+{
+public:
+ /**
+ * @brief Construct a new Node object for Transpose with param
+ * @param [in] param Parameters for Node
+ */
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ /**
+ * @brief Destroy the Node object for Transpose
+ */
+ virtual ~Node() = default;
+
+public:
+ /**
+ * @brief Get parameters for Transpose
+ * @return Parameters of Transpose
+ */
+ const Param &param(void) const { return _param; }
+
+public:
+ /**
+ * @brief Function for accepting node for Transpose
+ * @param [in] v Node visitor for invoking visit function of Transpose
+ * @return N/A
+ */
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Transpose
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_TRANSPOSE_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.cc b/runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.cc
new file mode 100644
index 000000000..502eff525
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/TransposeConv.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 6 && outputCount == 1);
+
+ ofm_index = outputs[0];
+
+ // Each input should be interpreted as follows:
+ //
+ // 0 -> Output Shape Index
+ // 1 -> Weights Index
+ // 2 -> Input Tensor Index
+ // 3 -> Padding Type
+ // 4 -> Stride width
+ // 5 -> Stride height
+
+ op_shape_index = inputs[0];
+ ker_index = inputs[1];
+ ifm_index = inputs[2];
+ padding_index = inputs[3];
+ hstride_index = inputs[4];
+ vstride_index = inputs[5];
+}
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.h b/runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.h
new file mode 100644
index 000000000..b0122f82d
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/TransposeConv.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_TRANSPOSECONV_H__
+#define __INTERNAL_OP_TRANSPOSECONV_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+struct Param
+{
+ int32_t ofm_index;
+
+ int32_t op_shape_index;
+ int32_t ker_index;
+ int32_t ifm_index;
+ int32_t padding_index;
+ int32_t hstride_index;
+ int32_t vstride_index;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_TRANSPOSECONV_H__
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Unpack.cc b/runtime/contrib/pure_arm_compute/src/internal/op/Unpack.cc
new file mode 100644
index 000000000..a1be0280c
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Unpack.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Unpack.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+// There are three inputs: tensor which is to be unpacked,
+// axis along which tensor needs to be unpacked
+// and number of splits along the axis.
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ assert(inputCount == 3);
+
+ ifm_index = inputs[0];
+
+ for (uint32_t n = 0; n < outputCount; ++n)
+ {
+ ofm_indexes.emplace_back(outputs[n]);
+ }
+ num_split_index = inputs[1];
+ axis_index = inputs[2];
+}
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtime/contrib/pure_arm_compute/src/internal/op/Unpack.h b/runtime/contrib/pure_arm_compute/src/internal/op/Unpack.h
new file mode 100644
index 000000000..575e3d024
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/internal/op/Unpack.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_UNPACK_H__
+#define __INTERNAL_OP_UNPACK_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+
+struct Param
+{
+ int32_t ifm_index;
+ int32_t axis_index;
+ int32_t num_split_index;
+ // There are N outputs after Unpacking Input Tensor along axis
+ std::vector<int32_t> ofm_indexes;
+
+ Param() = default;
+ Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+ Node(const Param &param) : _param(param)
+ {
+ // DO NOTHING
+ }
+
+public:
+ virtual ~Node() = default;
+
+public:
+ const Param &param(void) const { return _param; }
+
+public:
+ void accept(NodeVisitor &&) const override;
+
+private:
+ const Param _param;
+};
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_UNPACK_H__
diff --git a/runtime/contrib/pure_arm_compute/src/library_info.cc b/runtime/contrib/pure_arm_compute/src/library_info.cc
new file mode 100644
index 000000000..02a616094
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/library_info.cc
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+volatile static const char info[] = "library information : runtime=pure_arm_compute";
diff --git a/runtime/contrib/pure_arm_compute/src/logging.h b/runtime/contrib/pure_arm_compute/src/logging.h
new file mode 100644
index 000000000..447da03e2
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/logging.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file logging.h
+ * @brief This file contains Context class for logging.
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __PURE_ARM_COMPUTE_LOGGING_H__
+#define __PURE_ARM_COMPUTE_LOGGING_H__
+
+#include <iostream>
+
+namespace logging
+{
+
+/**
+ * @brief class to define Context for logging
+ */
+class Context
+{
+public:
+ /**
+ * @brief Construct default
+ */
+ Context() : _enabled{false}
+ {
+ auto env = std::getenv("PURE_ARM_COMPUTE_LOG_ENABLE");
+
+ if (env && std::strtol(env, NULL, 0) > 0)
+ {
+ _enabled = true;
+ }
+ }
+
+public:
+ /**
+ * @brief Get @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise
+ * @c false
+ * @return @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise @c
+ * false
+ */
+ bool enabled(void) const { return _enabled; }
+
+private:
+ bool _enabled;
+};
+
+/**
+ * @brief static Context class for logging
+ */
+static Context ctx;
+
+} // namespace logging
+
+#define VERBOSE(name) \
+ if (::logging::ctx.enabled()) \
+ std::cout << "[" << #name << "] "
+
+#endif // __PURE_ARM_COMPUTE_LOGGING_H__
diff --git a/runtime/contrib/pure_arm_compute/src/memory.cc b/runtime/contrib/pure_arm_compute/src/memory.cc
new file mode 100644
index 000000000..9e999661a
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/memory.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+#include <sys/mman.h>
+#include <memory>
+
+#include "cpp14/memory.h"
+#include "memory.h"
+
+int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
+ ANeuralNetworksMemory **memory)
+{
+ if (memory == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ // Use unique pointer to avoid memory leak
+ std::unique_ptr<ANeuralNetworksMemory> memory_ptr =
+ nnfw::cpp14::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
+ if (memory_ptr == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+ *memory = memory_ptr.release();
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksMemory_free(ANeuralNetworksMemory *memory) { delete memory; }
+
+//
+// ANeuralNetworksMemory
+//
+ANeuralNetworksMemory::ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset)
+{
+ _base = reinterpret_cast<uint8_t *>(mmap(nullptr, size, protect, MAP_PRIVATE, fd, offset));
+ _size = size;
+}
+
+ANeuralNetworksMemory::~ANeuralNetworksMemory() { munmap(reinterpret_cast<void *>(_base), _size); }
diff --git a/runtime/contrib/pure_arm_compute/src/memory.h b/runtime/contrib/pure_arm_compute/src/memory.h
new file mode 100644
index 000000000..ffac26ef6
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/memory.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file execution.h
+ * @brief This file defines ANeuralNetworksMemory class for handling Memory NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __MEMORY_H__
+#define __MEMORY_H__
+
+#include <cstdint>
+
+/**
+ * @brief struct to define Memory NNAPI
+ */
+struct ANeuralNetworksMemory
+{
+public:
+ /**
+ * @brief Constructor with params
+ * @param [in] size The requested size in bytes
+ * @param [in] protect The desired memory protection for the mapping
+ * @param [in] fd The requested file descriptor
+ * @param [in] offset The offset to the beginning of the file of the area to map
+ */
+ ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset);
+ /**
+ * @brief Destructor default
+ */
+ ~ANeuralNetworksMemory();
+
+public:
+ /**
+ * @brief Get size
+ * @return size
+ */
+ size_t size(void) const { return _size; }
+ /**
+ * @brief Get base pointer
+ * @return base pointer
+ */
+ uint8_t *base(void) { return _base; }
+ /**
+ * @brief Get base pointer
+ * @return const base pointer
+ */
+ const uint8_t *base(void) const { return _base; }
+
+private:
+ size_t _size;
+ uint8_t *_base;
+};
+
+#endif // __MEMORY_H__
diff --git a/runtime/contrib/pure_arm_compute/src/model.cc b/runtime/contrib/pure_arm_compute/src/model.cc
new file mode 100644
index 000000000..ddca589db
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/model.cc
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+#include <NeuralNetworksEx.h>
+
+#include <cassert>
+#include <stdexcept>
+
+#include "model.h"
+#include "memory.h"
+
+int ANeuralNetworksModel_create(ANeuralNetworksModel **model)
+{
+ if (model == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ ANeuralNetworksModel *model_ptr = new ANeuralNetworksModel{};
+
+ if (model_ptr == nullptr)
+ {
+ return ANEURALNETWORKS_OUT_OF_MEMORY;
+ }
+
+ *model = model_ptr;
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+void ANeuralNetworksModel_free(ANeuralNetworksModel *model) { delete model; }
+
+int ANeuralNetworksModel_addOperand(ANeuralNetworksModel *model,
+ const ANeuralNetworksOperandType *type)
+{
+ if ((model == nullptr) || (type == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ if (type->type == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM)
+ {
+ // Quantized:
+ // scale: a 32 bit floating point value greater than zero
+ // zeroPoint: a 32 bit integer, in range [0, 255]
+ if (type->scale <= 0.0f)
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ if (type->zeroPoint < 0 || type->zeroPoint > 255)
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
+ // NOTE Validation of scale and zeroPoint would be skipped for a while.
+ // We do not know whether scalar type can have scale and zeroPoint.
+ // To pass ValidationTest and GeneratedTest, this validation code
+ // would not be implemented until we can define this issue clearly.
+ //
+ // scale and zeroPoint should be zero for scalars and non-fixed point tensors
+ // else if ((type->scale != 0.0f) || (type->zeroPoint != 0))
+ // {
+ // return ANEURALNETWORKS_BAD_DATA;
+ // }
+
+ // scalar is ANEURALNETWORKS_FLOAT32, ANEURALNETWORKS_INT32 or ANEURALNETWORKS_UINT32.
+ // ANEURALNETWORKS_TENSOR_FLOAT32, ANEURALNETWORKS_TENSOR_INT32 and
+ // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM are not scalar
+ //
+ // dimensionCount should be zero for scalars
+ if (type->dimensionCount != 0 &&
+ (type->type == ANEURALNETWORKS_FLOAT32 || type->type == ANEURALNETWORKS_INT32 ||
+ type->type == ANEURALNETWORKS_UINT32))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ // ASSUME A tensor operand should consists of fp32 or int32 values.
+ // NOTE We do not care about scala operands.
+ assert((type->dimensionCount == 0) || (type->type == ANEURALNETWORKS_TENSOR_FLOAT32 ||
+ type->type == ANEURALNETWORKS_TENSOR_INT32 ||
+ type->type == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM));
+
+ internal::tflite::operand::Shape shape(type->dimensionCount);
+
+ for (uint32_t axis = 0; axis < type->dimensionCount; ++axis)
+ {
+ shape.dim(axis) = type->dimensions[axis];
+ }
+
+ model->deref().operands().append(shape, type->type, type->scale, type->zeroPoint);
+
+ // NOTE We do NOT allocate CLTensor here as we do not how to interpret this one.
+ // TensorFlow Lite may interpret a rank-4 tensor either as a feature map (with batch) or
+ // a convolution kernel.
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel *model, int32_t index,
+ const void *buffer, size_t length)
+{
+ if ((model == nullptr) || ((buffer == nullptr) && (length != 0)))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const internal::tflite::operand::Index ind{index};
+ auto &obj = model->deref().operands().at(ind);
+
+ if (buffer == nullptr)
+ {
+ using internal::tflite::operand::ExternalData;
+ obj.data<ExternalData>(reinterpret_cast<const uint8_t *>(buffer), length);
+ }
+ else
+ {
+ using internal::tflite::operand::CachedData;
+ obj.data<CachedData>(reinterpret_cast<const uint8_t *>(buffer), length);
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel *model, int32_t index,
+ const ANeuralNetworksMemory *memory,
+ size_t offset, size_t length)
+{
+ if ((model == nullptr) || (memory == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ const internal::tflite::operand::Index ind{index};
+ auto &obj = model->deref().operands().at(ind);
+
+ using internal::tflite::operand::ExternalData;
+
+ obj.data<ExternalData>(reinterpret_cast<const uint8_t *>(memory->base() + offset), length);
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if (model == nullptr || inputs == nullptr || outputs == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ if (type < ANEURALNETWORKS_ADD || type > ANEURALNETWORKS_TRANSPOSE)
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ switch (type)
+ {
+ case ANEURALNETWORKS_ADD:
+ {
+ assert(inputCount == 3);
+ assert(outputCount == 1);
+
+ using internal::tflite::op::Add::Param;
+ using internal::tflite::op::Add::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SUB:
+ {
+ assert(inputCount == 3);
+ assert(outputCount == 1);
+
+ using internal::tflite::op::Sub::Param;
+ using internal::tflite::op::Sub::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_MUL:
+ {
+ assert(inputCount == 3);
+ assert(outputCount == 1);
+
+ using internal::tflite::op::Mul::Param;
+ using internal::tflite::op::Mul::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_DIV:
+ {
+ assert(inputCount == 3);
+ assert(outputCount == 1);
+
+ using internal::tflite::op::Div::Param;
+ using internal::tflite::op::Div::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_CONV_2D:
+ {
+ // inputCount is either 7 or 10 acccording to NN API specification.
+ // - Padding is implicit when inputCount is 7
+ // - Padding is explicit when inputCount is 10
+ assert(inputCount == 7 || inputCount == 10);
+ assert(outputCount == 1);
+
+ if (inputCount == 7)
+ {
+ using internal::tflite::op::Conv2D::Implicit::Param;
+ using internal::tflite::op::Conv2D::Implicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+ else
+ {
+ using internal::tflite::op::Conv2D::Explicit::Param;
+ using internal::tflite::op::Conv2D::Explicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+
+ break;
+ }
+ case ANEURALNETWORKS_DEPTHWISE_CONV_2D:
+ {
+ // inputCount is either 8 or 11 acccording to NN API specification.
+ // - Padding is implicit when inputCount is 8
+ // - Padding is explicit when inputCount is 11
+ assert(inputCount == 8 || inputCount == 11);
+ assert(outputCount == 1);
+
+ if (inputCount == 8)
+ {
+ using internal::tflite::op::DepthwiseConv2D::Implicit::Param;
+ using internal::tflite::op::DepthwiseConv2D::Implicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+ else
+ {
+ using internal::tflite::op::DepthwiseConv2D::Explicit::Param;
+ using internal::tflite::op::DepthwiseConv2D::Explicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+
+ break;
+ }
+ case ANEURALNETWORKS_MAX_POOL_2D:
+ {
+ // inputCount is either 7 or 10 acccording to NN API specification.
+ // - Padding is implicit when inputCount is 7
+ // - Padding is explicit when inputCount is 10
+ assert(inputCount == 7 || inputCount == 10);
+ assert(outputCount == 1);
+
+ if (inputCount == 7)
+ {
+ using internal::tflite::op::MaxPool2D::Implicit::Param;
+ using internal::tflite::op::MaxPool2D::Implicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+ else
+ {
+ using internal::tflite::op::MaxPool2D::Explicit::Param;
+ using internal::tflite::op::MaxPool2D::Explicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+
+ break;
+ }
+ case ANEURALNETWORKS_DEQUANTIZE:
+ {
+ assert(outputCount == 1 && inputCount == 1);
+ using internal::tflite::op::Dequantize::Param;
+ using internal::tflite::op::Dequantize::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_AVERAGE_POOL_2D:
+ {
+ // inputCount is either 7 or 10 acccording to NN API specification.
+ // - Padding is implicit when inputCount is 7
+ // - Padding is explicit when inputCount is 10
+ assert(inputCount == 7 || inputCount == 10);
+ assert(outputCount == 1);
+
+ if (inputCount == 7)
+ {
+ using internal::tflite::op::AvgPool2D::Implicit::Param;
+ using internal::tflite::op::AvgPool2D::Implicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+ else
+ {
+ using internal::tflite::op::AvgPool2D::Explicit::Param;
+ using internal::tflite::op::AvgPool2D::Explicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+
+ break;
+ }
+ case ANEURALNETWORKS_CONCATENATION:
+ {
+ using internal::tflite::op::Concat::Param;
+ using internal::tflite::op::Concat::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RESIZE_BILINEAR:
+ {
+ using internal::tflite::op::ResizeBilinear::Param;
+ using internal::tflite::op::ResizeBilinear::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RESHAPE:
+ {
+ using internal::tflite::op::Reshape::Param;
+ using internal::tflite::op::Reshape::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SQUEEZE:
+ {
+ using internal::tflite::op::Squeeze::Param;
+ using internal::tflite::op::Squeeze::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_FULLY_CONNECTED:
+ {
+ using internal::tflite::op::FullyConnected::Param;
+ using internal::tflite::op::FullyConnected::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SOFTMAX:
+ {
+ using internal::tflite::op::Softmax::Param;
+ using internal::tflite::op::Softmax::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RELU:
+ {
+ using internal::tflite::op::ReLU::Param;
+ using internal::tflite::op::ReLU::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RELU1:
+ {
+ using internal::tflite::op::ReLU1::Param;
+ using internal::tflite::op::ReLU1::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RELU6:
+ {
+ using internal::tflite::op::ReLU6::Param;
+ using internal::tflite::op::ReLU6::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_TANH:
+ {
+ using internal::tflite::op::Tanh::Param;
+ using internal::tflite::op::Tanh::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_STRIDED_SLICE:
+ {
+ using internal::tflite::op::StridedSlice::Param;
+ using internal::tflite::op::StridedSlice::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOGISTIC:
+ {
+ using internal::tflite::op::Logistic::Param;
+ using internal::tflite::op::Logistic::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_MEAN:
+ {
+ using internal::tflite::op::Mean::Param;
+ using internal::tflite::op::Mean::Node;
+
+ auto &operations = model->deref().operations();
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RNN:
+ {
+ using internal::tflite::op::RNN::Param;
+ using internal::tflite::op::RNN::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_TRANSPOSE:
+ {
+ using internal::tflite::op::Transpose::Param;
+ using internal::tflite::op::Transpose::Node;
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LSTM:
+ {
+ using internal::tflite::op::LSTM::Param;
+ using internal::tflite::op::LSTM::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_FLOOR:
+ {
+ using internal::tflite::op::Floor::Param;
+ using internal::tflite::op::Floor::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_PAD:
+ {
+ assert(inputCount == 2 && outputCount == 1);
+
+ using internal::tflite::op::Pad::Param;
+ using internal::tflite::op::Pad::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SPACE_TO_DEPTH:
+ {
+ using internal::tflite::op::SpaceToDepth::Param;
+ using internal::tflite::op::SpaceToDepth::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SPACE_TO_BATCH_ND:
+ {
+ using internal::tflite::op::SpaceToBatchND::Param;
+ using internal::tflite::op::SpaceToBatchND::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_BATCH_TO_SPACE_ND:
+ {
+ using internal::tflite::op::BatchToSpaceNd::Param;
+ using internal::tflite::op::BatchToSpaceNd::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_L2_POOL_2D:
+ {
+ // Input count is 7 for Implicit Padding
+ // Input count is 10 for Explicit Padding
+ assert(inputCount == 7 || inputCount == 10);
+ assert(outputCount == 1);
+
+ if (inputCount == 7)
+ {
+ using internal::tflite::op::L2Pool2D::Implicit::Param;
+ using internal::tflite::op::L2Pool2D::Implicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+ else
+ {
+ using internal::tflite::op::L2Pool2D::Explicit::Param;
+ using internal::tflite::op::L2Pool2D::Explicit::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+ }
+
+ break;
+ }
+ case ANEURALNETWORKS_EMBEDDING_LOOKUP:
+ {
+ assert(inputCount == 2);
+ assert(outputCount == 1);
+
+ using internal::tflite::op::EmbeddingLookup::Param;
+ using internal::tflite::op::EmbeddingLookup::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_L2_NORMALIZATION:
+ {
+ assert(inputCount == 1 && outputCount == 1);
+
+ using internal::tflite::op::L2Normalization::Param;
+ using internal::tflite::op::L2Normalization::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_HASHTABLE_LOOKUP:
+ {
+ assert(inputCount == 3);
+ assert(outputCount == 2);
+
+ using internal::tflite::op::HashtableLookup::Param;
+ using internal::tflite::op::HashtableLookup::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION:
+ {
+
+ using internal::tflite::op::LocalResponseNormalization::Param;
+ using internal::tflite::op::LocalResponseNormalization::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_DEPTH_TO_SPACE:
+ {
+ using internal::tflite::op::DepthToSpace::Param;
+ using internal::tflite::op::DepthToSpace::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ default:
+ throw std::runtime_error{"Not supported operation"};
+ };
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
+ ANeuralNetworksOperationTypeEx type, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ switch (type)
+ {
+ case ANEURALNETWORKS_CAST_EX:
+ {
+ using internal::tflite::op::Cast::Param;
+ using internal::tflite::op::Cast::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_REDUCE_MIN_EX:
+ {
+ using internal::tflite::op::ReduceMin::Param;
+ using internal::tflite::op::ReduceMin::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_REDUCE_MAX_EX:
+ {
+ using internal::tflite::op::ReduceMax::Param;
+ using internal::tflite::op::ReduceMax::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_PRELU_EX:
+ {
+ using internal::tflite::op::PReLU::Param;
+ using internal::tflite::op::PReLU::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_TRANSPOSE_CONV_EX:
+ {
+ using internal::tflite::op::TransposeConv::Param;
+ using internal::tflite::op::TransposeConv::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOGICAL_AND_EX:
+ {
+ using internal::tflite::op::LogicalAnd::Param;
+ using internal::tflite::op::LogicalAnd::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOGICAL_OR_EX:
+ {
+ using internal::tflite::op::LogicalOr::Param;
+ using internal::tflite::op::LogicalOr::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_LOGICAL_NOT_EX:
+ {
+ using internal::tflite::op::LogicalNot::Param;
+ using internal::tflite::op::LogicalNot::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_RSQRT_EX:
+ {
+ using internal::tflite::op::RSQRT::Param;
+ using internal::tflite::op::RSQRT::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SQRT_EX:
+ {
+ using internal::tflite::op::SQRT::Param;
+ using internal::tflite::op::SQRT::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_EQUAL_EX:
+ {
+ using internal::tflite::op::Equal::Param;
+ using internal::tflite::op::Equal::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SQUARED_DIFFERENCE_EX:
+ {
+ using internal::tflite::op::SquaredDifference::Param;
+ using internal::tflite::op::SquaredDifference::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_TOPK_V2_EX:
+ {
+ using internal::tflite::op::TopKV2::Param;
+ using internal::tflite::op::TopKV2::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_GATHER_EX:
+ {
+ using internal::tflite::op::Gather::Param;
+ using internal::tflite::op::Gather::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_SPLIT_EX:
+ {
+ using internal::tflite::op::Split::Param;
+ using internal::tflite::op::Split::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_UNPACK_EX:
+ {
+ using internal::tflite::op::Unpack::Param;
+ using internal::tflite::op::Unpack::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_NEG_EX:
+ {
+ using internal::tflite::op::Neg::Param;
+ using internal::tflite::op::Neg::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_EXP_EX:
+ {
+ using internal::tflite::op::Exp::Param;
+ using internal::tflite::op::Exp::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_REDUCE_SUM_EX:
+ {
+ using internal::tflite::op::ReduceSum::Param;
+ using internal::tflite::op::ReduceSum::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_PACK_EX:
+ {
+ using internal::tflite::op::Pack::Param;
+ using internal::tflite::op::Pack::Node;
+
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_ABS_EX:
+ {
+ using internal::tflite::op::Abs::Param;
+ using internal::tflite::op::Abs::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_ARGMAX_EX:
+ {
+ using internal::tflite::op::ArgMax::Param;
+ using internal::tflite::op::ArgMax::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+ case ANEURALNETWORKS_NOT_EQUAL_EX:
+ {
+ using internal::tflite::op::NotEqual::Param;
+ using internal::tflite::op::NotEqual::Node;
+
+ // Add 'operations'
+ auto &operations = model->deref().operations();
+
+ operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+ break;
+ }
+
+ default:
+ throw std::runtime_error{"Not supported operation"};
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel *model, uint32_t inputCount,
+ const uint32_t *inputs, uint32_t outputCount,
+ const uint32_t *outputs)
+{
+ if ((model == nullptr) || (inputs == nullptr) || (outputs == nullptr))
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ // NOTE ::internal::tflite::operand::Index uses int as its underlying type as various NNAPI
+ // functions such as ANeuralNetworksModel_setOperandValue use int to represent operand index
+ //
+ // ANeuralNetworksModel_identifyInputsAndOutputs, however, uses uint32_t to represent operand
+ // index.
+ //
+ // Below, static_cast<int>(...) is introduced to eliminate compiler warning.
+ for (uint32_t n = 0; n < inputCount; ++n)
+ {
+ const ::internal::tflite::operand::Index ind{static_cast<int>(inputs[n])};
+ model->deref().inputs.emplace_back(ind);
+ }
+
+ for (uint32_t n = 0; n < outputCount; ++n)
+ {
+ const ::internal::tflite::operand::Index ind{static_cast<int>(outputs[n])};
+ model->deref().outputs.emplace_back(ind);
+ }
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ANeuralNetworksModel_finish(ANeuralNetworksModel *model)
+{
+ if (model == nullptr)
+ {
+ return ANEURALNETWORKS_UNEXPECTED_NULL;
+ }
+
+ if (model->isFinished())
+ {
+ return ANEURALNETWORKS_BAD_STATE;
+ }
+
+ model->markAsFinished();
+
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+//
+// ANeuralNetworksModel
+//
+ANeuralNetworksModel::ANeuralNetworksModel() : _model{new internal::tflite::Model}
+{
+ // DO NOTHING
+}
diff --git a/runtime/contrib/pure_arm_compute/src/model.h b/runtime/contrib/pure_arm_compute/src/model.h
new file mode 100644
index 000000000..8acc894f4
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/src/model.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file model.h
+ * @brief This file contains ANeuralNetworksModel classe for handling Model NNAPI such as
+ * ANeuralNetworksModel_create, ANeuralNetworksModel_addOperand
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __MODEL_H__
+#define __MODEL_H__
+
+#include "internal/Model.h"
+
+/**
+ * @brief struct to express Model of NNAPI
+ */
+struct ANeuralNetworksModel
+{
+public:
+ /**
+ * @brief Construct without params
+ */
+ ANeuralNetworksModel();
+
+public:
+ /**
+ * @brief Get reference of internal::tflite::Model
+ * @return Reference of internal::tflite::Model
+ */
+ internal::tflite::Model &deref(void) { return *_model; }
+
+public:
+ /**
+ * @brief Release internal::tflite::Model pointer to param
+ * @param [in] model To get released internal::tflite::Model pointer
+ * @return N/A
+ */
+ void release(std::shared_ptr<const internal::tflite::Model> &model) { model = _model; }
+ /**
+ * @brief Get @c true if ANeuralNetworksModel_finish has been called, otherwise @c false
+ * @return @c true if ANeuralNetworksModel_finish has been called, otherwise @c false
+ */
+ bool isFinished() { return _isFinished == true; }
+ /**
+ * @brief Mark model process finished
+ * @return N/A
+ */
+ void markAsFinished() { _isFinished = true; }
+
+private:
+ std::shared_ptr<internal::tflite::Model> _model;
+ bool _isFinished{false};
+};
+
+#endif // __MODEL_H__
diff --git a/runtime/contrib/pure_arm_compute/symbolcheck.cpp b/runtime/contrib/pure_arm_compute/symbolcheck.cpp
new file mode 100644
index 000000000..d24943e10
--- /dev/null
+++ b/runtime/contrib/pure_arm_compute/symbolcheck.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <NeuralNetworks.h>
+
+#include <iostream>
+
+#define SHOW_FUNC_ENTRY(name) \
+ { \
+ std::cout << #name << " is at " << reinterpret_cast<void *>(name) << std::endl; \
+ }
+
+int main(int argc, char **argv)
+{
+ SHOW_FUNC_ENTRY(ANeuralNetworksMemory_createFromFd);
+ SHOW_FUNC_ENTRY(ANeuralNetworksMemory_free);
+
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_create);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_addOperand);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_setOperandValue);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_setOperandValueFromMemory);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_addOperation);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_identifyInputsAndOutputs);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_finish);
+ SHOW_FUNC_ENTRY(ANeuralNetworksModel_free);
+
+ SHOW_FUNC_ENTRY(ANeuralNetworksCompilation_create);
+ SHOW_FUNC_ENTRY(ANeuralNetworksCompilation_finish);
+ // ANeuralNetworksCompilation_setPreference and ANeuralNetworksCompilation_free
+ // are introduced to reuse NNAPI tests under runtimes/tests. Note that these APIs
+ // are not necessary for supporting Tensorflow Lite interperter
+ SHOW_FUNC_ENTRY(ANeuralNetworksCompilation_setPreference);
+ SHOW_FUNC_ENTRY(ANeuralNetworksCompilation_free);
+ SHOW_FUNC_ENTRY(ANeuralNetworksCompilation_create);
+
+ SHOW_FUNC_ENTRY(ANeuralNetworksExecution_create);
+ SHOW_FUNC_ENTRY(ANeuralNetworksExecution_setInput);
+ SHOW_FUNC_ENTRY(ANeuralNetworksExecution_setOutput);
+ SHOW_FUNC_ENTRY(ANeuralNetworksExecution_startCompute);
+ SHOW_FUNC_ENTRY(ANeuralNetworksExecution_free);
+
+ SHOW_FUNC_ENTRY(ANeuralNetworksEvent_wait);
+ SHOW_FUNC_ENTRY(ANeuralNetworksEvent_free);
+
+ // NOTE Pure CL runtime does not implement following NN API(s) as
+ // Tensorflow Lite does not use these API(s)
+ // SHOW_FUNC_ENTRY(ANeuralNetworksExecution_setInputFromMemory);
+ // SHOW_FUNC_ENTRY(ANeuralNetworksExecution_setOutputFromMemory);
+
+ return 0;
+}
diff --git a/runtime/contrib/tflite_classify/CMakeLists.txt b/runtime/contrib/tflite_classify/CMakeLists.txt
new file mode 100644
index 000000000..c0bf62738
--- /dev/null
+++ b/runtime/contrib/tflite_classify/CMakeLists.txt
@@ -0,0 +1,22 @@
+if(NOT BUILD_TFLITE_CLASSIFY_APP)
+ return()
+endif(NOT BUILD_TFLITE_CLASSIFY_APP)
+
+list(APPEND SOURCES "src/tflite_classify.cc")
+list(APPEND SOURCES "src/ImageClassifier.cc")
+list(APPEND SOURCES "src/InferenceInterface.cc")
+
+## Required package
+find_package(OpenCV REQUIRED)
+find_package(Boost REQUIRED COMPONENTS system filesystem)
+
+# Without this line, this appliation couldn't search the opencv library that were already installed in ${ROOTFS_DIR}/usr/lib/arm-linux-gnueabihf directory
+set(CMAKE_EXE_LINKER_FLAGS "-Wl,--as-needed -Wl,--rpath=${ROOTFS_DIR}/usr/lib/arm-linux-gnueabihf -Wl,--rpath=${ROOTFS_DIR}/lib/arm-linux-gnueabihf")
+
+add_executable(tflite_classify ${SOURCES})
+target_include_directories(tflite_classify PRIVATE src)
+target_link_libraries(tflite_classify tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
+target_link_libraries(tflite_classify ${Boost_LIBRARIES})
+target_link_libraries(tflite_classify ${OpenCV_LIBRARIES})
+
+install(TARGETS tflite_classify DESTINATION bin)
diff --git a/runtime/contrib/tflite_classify/src/ImageClassifier.cc b/runtime/contrib/tflite_classify/src/ImageClassifier.cc
new file mode 100644
index 000000000..fae4f066c
--- /dev/null
+++ b/runtime/contrib/tflite_classify/src/ImageClassifier.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ImageClassifier.h"
+
+#include <fstream>
+#include <queue>
+#include <algorithm>
+
+ImageClassifier::ImageClassifier(const std::string &model_file, const std::string &label_file,
+ const int input_size, const int image_mean, const int image_std,
+ const std::string &input_name, const std::string &output_name,
+ const bool use_nnapi)
+ : _inference(new InferenceInterface(model_file, use_nnapi)), _input_size(input_size),
+ _image_mean(image_mean), _image_std(image_std), _input_name(input_name),
+ _output_name(output_name)
+{
+ // Load label
+ std::ifstream label_stream(label_file.c_str());
+ assert(label_stream);
+
+ std::string line;
+ while (std::getline(label_stream, line))
+ {
+ _labels.push_back(line);
+ }
+ _num_classes = _inference->getTensorSize(_output_name);
+ std::cout << "Output tensor size is " << _num_classes << ", label size is " << _labels.size()
+ << std::endl;
+
+ // Pre-allocate buffers
+ _fdata.reserve(_input_size * _input_size * 3);
+ _outputs.reserve(_num_classes);
+}
+
+std::vector<Recognition> ImageClassifier::recognizeImage(const cv::Mat &image)
+{
+ // Resize image
+ cv::Mat cropped;
+ cv::resize(image, cropped, cv::Size(_input_size, _input_size), 0, 0, cv::INTER_AREA);
+
+ // Preprocess the image data from 0~255 int to normalized float based
+ // on the provided parameters
+ _fdata.clear();
+ for (int y = 0; y < cropped.rows; ++y)
+ {
+ for (int x = 0; x < cropped.cols; ++x)
+ {
+ cv::Vec3b color = cropped.at<cv::Vec3b>(y, x);
+ color[0] = color[0] - (float)_image_mean / _image_std;
+ color[1] = color[1] - (float)_image_mean / _image_std;
+ color[2] = color[2] - (float)_image_mean / _image_std;
+
+ _fdata.push_back(color[0]);
+ _fdata.push_back(color[1]);
+ _fdata.push_back(color[2]);
+
+ cropped.at<cv::Vec3b>(y, x) = color;
+ }
+ }
+
+ // Copy the input data into model
+ _inference->feed(_input_name, _fdata, 1, _input_size, _input_size, 3);
+
+ // Run the inference call
+ _inference->run(_output_name);
+
+ // Copy the output tensor back into the output array
+ _inference->fetch(_output_name, _outputs);
+
+ // Find the best classifications
+ auto compare = [](const Recognition &lhs, const Recognition &rhs) {
+ return lhs.confidence < rhs.confidence;
+ };
+
+ std::priority_queue<Recognition, std::vector<Recognition>, decltype(compare)> pq(compare);
+ for (int i = 0; i < _num_classes; ++i)
+ {
+ if (_outputs[i] > _threshold)
+ {
+ pq.push(Recognition(_outputs[i], _labels[i]));
+ }
+ }
+
+ std::vector<Recognition> results;
+ int min = std::min(pq.size(), _max_results);
+ for (int i = 0; i < min; ++i)
+ {
+ results.push_back(pq.top());
+ pq.pop();
+ }
+
+ return results;
+}
diff --git a/runtime/contrib/tflite_classify/src/ImageClassifier.h b/runtime/contrib/tflite_classify/src/ImageClassifier.h
new file mode 100644
index 000000000..1ba19afb0
--- /dev/null
+++ b/runtime/contrib/tflite_classify/src/ImageClassifier.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ImageClassifier.h
+ * @brief This file contains ImageClassifier class and Recognition structure
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __TFLITE_CLASSIFY_IMAGE_CLASSIFIER_H__
+#define __TFLITE_CLASSIFY_IMAGE_CLASSIFIER_H__
+
+#include "InferenceInterface.h"
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <opencv2/opencv.hpp>
+
+/**
+ * @brief struct to define an immutable result returned by a Classifier
+ */
+struct Recognition
+{
+public:
+ /**
+ * @brief Construct a new Recognition object with confidence and title
+ * @param[in] _confidence A sortable score for how good the recognition is relative to others.
+ * Higher should be better.
+ * @param[in] _title Display name for the recognition
+ */
+ Recognition(float _confidence, std::string _title) : confidence(_confidence), title(_title) {}
+
+ float confidence; /** A sortable score for how good the recognition is relative to others. Higher
+ should be better. */
+ std::string title; /** Display name for the recognition */
+};
+
+/**
+ * @brief Class to define a classifier specialized to label images
+ */
+class ImageClassifier
+{
+public:
+ /**
+ * @brief Construct a new ImageClassifier object with parameters
+ * @param[in] model_file The filepath of the model FlatBuffer protocol buffer
+ * @param[in] label_file The filepath of label file for classes
+ * @param[in] input_size The input size. A square image of input_size x input_size is assumed
+ * @param[in] image_mean The assumed mean of the image values
+ * @param[in] image_std The assumed std of the image values
+ * @param[in] input_name The label of the image input node
+ * @param[in] output_name The label of the output node
+ * @param[in] use_nnapi The flag to distinguish between TfLite interpreter and NNFW runtime
+ */
+ ImageClassifier(const std::string &model_file, const std::string &label_file,
+ const int input_size, const int image_mean, const int image_std,
+ const std::string &input_name, const std::string &output_name,
+ const bool use_nnapi);
+
+ /**
+ * @brief Recognize the given image data
+ * @param[in] image The image data to recognize
+ * @return An immutable result vector array
+ */
+ std::vector<Recognition> recognizeImage(const cv::Mat &image);
+
+private:
+ const float _threshold = 0.1f;
+ const unsigned int _max_results = 3;
+
+ std::unique_ptr<InferenceInterface> _inference;
+ int _input_size;
+ int _image_mean;
+ int _image_std;
+ std::string _input_name;
+ std::string _output_name;
+
+ std::vector<std::string> _labels;
+ std::vector<float> _fdata;
+ std::vector<float> _outputs;
+ int _num_classes;
+};
+
+#endif // __TFLITE_CLASSIFY_IMAGE_CLASSIFIER_H__
diff --git a/runtime/contrib/tflite_classify/src/InferenceInterface.cc b/runtime/contrib/tflite_classify/src/InferenceInterface.cc
new file mode 100644
index 000000000..160943477
--- /dev/null
+++ b/runtime/contrib/tflite_classify/src/InferenceInterface.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InferenceInterface.h"
+
+using namespace tflite;
+using namespace tflite::ops::builtin;
+
+InferenceInterface::InferenceInterface(const std::string &model_file, const bool use_nnapi)
+ : _interpreter(nullptr), _model(nullptr), _sess(nullptr)
+{
+ // Load model
+ StderrReporter error_reporter;
+ _model = FlatBufferModel::BuildFromFile(model_file.c_str(), &error_reporter);
+ BuiltinOpResolver resolver;
+ InterpreterBuilder builder(*_model, resolver);
+ builder(&_interpreter);
+
+ if (use_nnapi)
+ {
+ _sess = std::make_shared<nnfw::tflite::NNAPISession>(_interpreter.get());
+ }
+ else
+ {
+ _sess = std::make_shared<nnfw::tflite::InterpreterSession>(_interpreter.get());
+ }
+
+ _sess->prepare();
+}
+
+InferenceInterface::~InferenceInterface() { _sess->teardown(); }
+
+void InferenceInterface::feed(const std::string &input_name, const std::vector<float> &data,
+ const int batch, const int height, const int width, const int channel)
+{
+ // Set input tensor
+ for (const auto &id : _interpreter->inputs())
+ {
+ if (_interpreter->tensor(id)->name == input_name)
+ {
+ assert(_interpreter->tensor(id)->type == kTfLiteFloat32);
+ float *p = _interpreter->tensor(id)->data.f;
+
+ // TODO consider batch
+ for (int y = 0; y < height; ++y)
+ {
+ for (int x = 0; x < width; ++x)
+ {
+ for (int c = 0; c < channel; ++c)
+ {
+ *p++ = data[y * width * channel + x * channel + c];
+ }
+ }
+ }
+ }
+ }
+}
+
+void InferenceInterface::run(const std::string &output_name)
+{
+ // Run model
+ _sess->run();
+}
+
+void InferenceInterface::fetch(const std::string &output_name, std::vector<float> &outputs)
+{
+ // Get output tensor
+ for (const auto &id : _interpreter->outputs())
+ {
+ if (_interpreter->tensor(id)->name == output_name)
+ {
+ assert(_interpreter->tensor(id)->type == kTfLiteFloat32);
+ assert(getTensorSize(output_name) == outputs.capacity());
+ float *p = _interpreter->tensor(id)->data.f;
+
+ outputs.clear();
+ for (int i = 0; i < outputs.capacity(); ++i)
+ {
+ outputs.push_back(p[i]);
+ }
+ }
+ }
+}
+
+int InferenceInterface::getTensorSize(const std::string &name)
+{
+ for (const auto &id : _interpreter->outputs())
+ {
+ if (_interpreter->tensor(id)->name == name)
+ {
+ TfLiteTensor *t = _interpreter->tensor(id);
+ int v = 1;
+ for (int i = 0; i < t->dims->size; ++i)
+ {
+ v *= t->dims->data[i];
+ }
+ return v;
+ }
+ }
+ return -1;
+}
diff --git a/runtime/contrib/tflite_classify/src/InferenceInterface.h b/runtime/contrib/tflite_classify/src/InferenceInterface.h
new file mode 100644
index 000000000..fe2c1aa6c
--- /dev/null
+++ b/runtime/contrib/tflite_classify/src/InferenceInterface.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file InferenceInterface.h
+ * @brief This file contains class for running the actual inference model
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __TFLITE_CLASSIFY_INFERENCE_INTERFACE_H__
+#define __TFLITE_CLASSIFY_INFERENCE_INTERFACE_H__
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/lite/model.h"
+
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+
+#include <iostream>
+#include <string>
+
+/**
+ * @brief Class to define a inference interface for recognizing data
+ */
+class InferenceInterface
+{
+public:
+ /**
+ * @brief Construct a new InferenceInterface object with parameters
+ * @param[in] model_file The filepath of the model FlatBuffer protocol buffer
+ * @param[in] use_nnapi The flag to distinguish between TfLite interpreter and NNFW runtime
+ */
+ InferenceInterface(const std::string &model_file, const bool use_nnapi);
+
+ /**
+ * @brief Destructor an InferenceInterface object
+ */
+ ~InferenceInterface();
+
+ /**
+ * @brief Copy the input data into model
+ * @param[in] input_name The label of the image input node
+ * @param[in] data The actual data to be copied into input tensor
+ * @param[in] batch The number of batch size
+ * @param[in] height The number of height size
+ * @param[in] width The number of width size
+ * @param[in] channel The number of channel size
+ * @return N/A
+ */
+ void feed(const std::string &input_name, const std::vector<float> &data, const int batch,
+ const int height, const int width, const int channel);
+ /**
+ * @brief Run the inference call
+ * @param[in] output_name The label of the output node
+ * @return N/A
+ */
+ void run(const std::string &output_name);
+
+ /**
+ * @brief Copy the output tensor back into the output array
+ * @param[in] output_node The label of the output node
+ * @param[in] outputs The output data array
+ * @return N/A
+ */
+ void fetch(const std::string &output_name, std::vector<float> &outputs);
+
+ /**
+ * @brief Get tensor size
+ * @param[in] name The label of the node
+ * @result The size of tensor
+ */
+ int getTensorSize(const std::string &name);
+
+private:
+ std::unique_ptr<tflite::Interpreter> _interpreter;
+ std::unique_ptr<tflite::FlatBufferModel> _model;
+ std::shared_ptr<nnfw::tflite::Session> _sess;
+};
+
+#endif // __TFLITE_CLASSIFY_INFERENCE_INTERFACE_H__
diff --git a/runtime/contrib/tflite_classify/src/tflite_classify.cc b/runtime/contrib/tflite_classify/src/tflite_classify.cc
new file mode 100644
index 000000000..40c15f331
--- /dev/null
+++ b/runtime/contrib/tflite_classify/src/tflite_classify.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ImageClassifier.h"
+
+#include <iostream>
+
+#include <boost/filesystem.hpp>
+#include <opencv2/opencv.hpp>
+
+namespace fs = boost::filesystem;
+
+int main(const int argc, char **argv)
+{
+ const std::string MODEL_FILE = "tensorflow_inception_graph.tflite";
+ const std::string LABEL_FILE = "imagenet_comp_graph_label_strings.txt";
+
+ const std::string INPUT_NAME = "input";
+ const std::string OUTPUT_NAME = "output";
+ const int INPUT_SIZE = 224;
+ const int IMAGE_MEAN = 117;
+ const int IMAGE_STD = 1;
+ const int OUTPUT_SIZE = 1008;
+
+ const int FRAME_WIDTH = 640;
+ const int FRAME_HEIGHT = 480;
+
+ bool use_nnapi = false;
+ bool debug_mode = false;
+
+ if (std::getenv("USE_NNAPI") != nullptr)
+ {
+ use_nnapi = true;
+ }
+
+ if (std::getenv("DEBUG_MODE") != nullptr)
+ {
+ debug_mode = true;
+ }
+
+ std::cout << "USE_NNAPI : " << use_nnapi << std::endl;
+ std::cout << "DEBUG_MODE : " << debug_mode << std::endl;
+
+ std::cout << "Model : " << MODEL_FILE << std::endl;
+ std::cout << "Label : " << LABEL_FILE << std::endl;
+
+ if (!fs::exists(MODEL_FILE))
+ {
+ std::cerr << "model file not found: " << MODEL_FILE << std::endl;
+ exit(1);
+ }
+
+ if (!fs::exists(LABEL_FILE))
+ {
+ std::cerr << "label file not found: " << LABEL_FILE << std::endl;
+ exit(1);
+ }
+
+ // Create ImageClassifier
+ std::unique_ptr<ImageClassifier> classifier(
+ new ImageClassifier(MODEL_FILE, LABEL_FILE, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, INPUT_NAME,
+ OUTPUT_NAME, use_nnapi));
+
+ // Cam setting
+ cv::VideoCapture cap(0);
+ cv::Mat frame;
+
+ // Initialize camera
+ cap.set(CV_CAP_PROP_FRAME_WIDTH, FRAME_WIDTH);
+ cap.set(CV_CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT);
+ cap.set(CV_CAP_PROP_FPS, 5);
+
+ std::vector<Recognition> results;
+ clock_t begin, end;
+ while (cap.isOpened())
+ {
+ // Get image data
+ if (!cap.read(frame))
+ {
+ std::cout << "Frame is null..." << std::endl;
+ break;
+ }
+
+ if (debug_mode)
+ {
+ begin = clock();
+ }
+ // Recognize image
+ results = classifier->recognizeImage(frame);
+ if (debug_mode)
+ {
+ end = clock();
+ }
+
+ // Show result data
+ std::cout << std::endl;
+ if (results.size() > 0)
+ {
+ for (int i = 0; i < results.size(); ++i)
+ {
+ std::cout << results[i].title << ": " << results[i].confidence << std::endl;
+ }
+ }
+ else
+ {
+ std::cout << "." << std::endl;
+ }
+ if (debug_mode)
+ {
+ std::cout << "Frame: " << FRAME_WIDTH << "x" << FRAME_HEIGHT << std::endl;
+ std::cout << "Crop: " << INPUT_SIZE << "x" << INPUT_SIZE << std::endl;
+ std::cout << "Inference time(ms): " << ((end - begin) / (CLOCKS_PER_SEC / 1000)) << std::endl;
+ }
+ }
+
+ cap.release();
+
+ return 0;
+}
diff --git a/runtime/contrib/tflite_test/CMakeLists.txt b/runtime/contrib/tflite_test/CMakeLists.txt
new file mode 100644
index 000000000..760952a84
--- /dev/null
+++ b/runtime/contrib/tflite_test/CMakeLists.txt
@@ -0,0 +1,16 @@
+if(NOT BUILD_TFLITE_TEST)
+ return()
+endif(NOT BUILD_TFLITE_TEST)
+
+nnfw_find_package(Tensorflow QUIET)
+
+if(NOT Tensorflow_FOUND)
+ return()
+endif(NOT Tensorflow_FOUND)
+
+list(APPEND SOURCES tflite_test.cpp)
+
+add_executable(tflite_test ${SOURCES})
+target_link_libraries(tflite_test PRIVATE nnfw_lib_tflite)
+target_link_libraries(tflite_test PRIVATE tensorflow-lite)
+target_link_libraries(tflite_test PRIVATE tensorflow-core)
diff --git a/runtime/contrib/tflite_test/tflite_test.cpp b/runtime/contrib/tflite_test/tflite_test.cpp
new file mode 100644
index 000000000..80ba448c6
--- /dev/null
+++ b/runtime/contrib/tflite_test/tflite_test.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tensorflow/lite/model.h>
+#include <tensorflow/core/public/session.h>
+
+#include "tflite/Assert.h"
+#include "tflite/Session.h"
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+#include "tflite/ext/kernels/register.h"
+
+#include "misc/fp32.h"
+
+#include <iostream>
+
+#include <string>
+#include <vector>
+
+#define TF_ENSURE(e) \
+ { \
+ if (!(e).ok()) \
+ { \
+ throw std::runtime_error{"'" #e "' FAILED"}; \
+ } \
+ }
+
+using namespace tflite;
+using namespace tflite::ops::builtin;
+
+std::unique_ptr<FlatBufferModel> BuildModelFromFile(const std::string &path)
+{
+ static StderrReporter reporter;
+ return FlatBufferModel::BuildFromFile(path.c_str(), &reporter);
+}
+
+std::unique_ptr<Interpreter> BuildInterpFromModel(const std::unique_ptr<FlatBufferModel> &model)
+{
+ std::unique_ptr<Interpreter> interp;
+
+ BuiltinOpResolver resolver;
+ InterpreterBuilder builder(*model, resolver);
+
+ TFLITE_ENSURE(builder(&interp));
+
+ return std::move(interp);
+}
+
+tensorflow::TensorShape asTensorflowShape(const TfLiteTensor *tensor)
+{
+ tensorflow::TensorShape shape;
+
+ const int rank = tensor->dims->size;
+
+ for (int axis = 0; axis < rank; ++axis)
+ {
+ shape.AddDim(tensor->dims->data[axis]);
+ }
+
+ return shape;
+}
+
+uint32_t count_elements(const TfLiteTensor *tensor)
+{
+ const int rank = tensor->dims->size;
+
+ if (rank == 0)
+ {
+ return 0;
+ }
+
+ uint32_t res = 1;
+
+ for (int axis = 0; axis < rank; ++axis)
+ {
+ res *= tensor->dims->data[axis];
+ }
+
+ return res;
+}
+
+int main(int argc, char **argv)
+{
+ bool use_nnapi = false;
+
+ if (std::getenv("USE_NNAPI") != nullptr)
+ {
+ use_nnapi = true;
+ }
+
+ if (argc < 3)
+ {
+ std::cerr << "USAGE: " << argv[0] << " [T/F lite model] [T/F model]" << std::endl;
+ return 255;
+ }
+
+ //
+ // Prepare Tensorflow Lite session
+ //
+ const std::string lite_model_path{argv[1]};
+
+ auto lite_model = BuildModelFromFile(lite_model_path);
+ auto lite_interp = BuildInterpFromModel(lite_model);
+
+ std::shared_ptr<nnfw::tflite::Session> lite_sess;
+
+ if (use_nnapi)
+ {
+ lite_sess = std::make_shared<nnfw::tflite::NNAPISession>(lite_interp.get());
+ }
+ else
+ {
+ lite_sess = std::make_shared<nnfw::tflite::InterpreterSession>(lite_interp.get());
+ }
+
+ //
+ // Prepare Tensorflow session
+ //
+ const std::string full_model_path{argv[2]};
+
+ tensorflow::Session *full_sess;
+ tensorflow::GraphDef full_model;
+
+ TF_ENSURE(tensorflow::NewSession(tensorflow::SessionOptions(), &full_sess));
+ TF_ENSURE(ReadBinaryProto(tensorflow::Env::Default(), full_model_path, &full_model));
+ TF_ENSURE(full_sess->Create(full_model));
+
+ //
+ //
+ //
+ std::vector<tensorflow::Tensor> input_nodes;
+ std::vector<std::string> input_names;
+
+ for (uint32_t n = 0; n < lite_interp->inputs().size(); ++n)
+ {
+ const TfLiteTensor *tensor = lite_interp->tensor(lite_interp->inputs().at(n));
+
+ input_nodes.emplace_back(tensorflow::DT_FLOAT, asTensorflowShape(tensor));
+ input_names.emplace_back(tensor->name);
+ }
+
+ assert(input_nodes.size() == input_names.size());
+ assert(input_nodes.size() == lite_interp->inputs().size());
+
+ std::vector<std::string> output_names;
+ std::vector<tensorflow::Tensor> output_nodes;
+
+ for (uint32_t n = 0; n < lite_interp->outputs().size(); ++n)
+ {
+ const TfLiteTensor *tensor = lite_interp->tensor(lite_interp->outputs().at(n));
+
+ output_names.emplace_back(tensor->name);
+ }
+
+ assert(output_names.size() == lite_interp->outputs().size());
+ // output_nodes will be initialized after Tensorflow Session run
+ assert(output_nodes.size() == 0);
+
+ //
+ // Prepare inference
+ //
+ lite_sess->prepare();
+
+ // TODO Feed Inputs (for both Tensorflow and Tensorflow Lite)
+ std::vector<std::pair<std::string, tensorflow::Tensor>> inputs;
+
+ for (uint32_t n = 0; n < input_nodes.size(); ++n)
+ {
+ inputs.emplace_back(input_names.at(0), input_nodes.at(0));
+ }
+
+ //
+ // Run inference
+ //
+ TF_ENSURE(full_sess->Run(inputs, output_names, {}, &output_nodes));
+
+ lite_sess->run();
+
+ //
+ // Compare Output
+ //
+ auto equals = [](float lhs, float rhs) {
+ // TODO Allow users to set tolerance
+ if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
+ {
+ return true;
+ }
+
+ return nnfw::misc::fp32::epsilon_equal(lhs, rhs);
+ };
+
+ const uint32_t output_count = output_names.size();
+
+ bool matched = true;
+
+ for (uint32_t n = 0; n < output_count; ++n)
+ {
+ const TfLiteTensor *tensor = lite_interp->tensor(lite_interp->outputs().at(n));
+
+ // TODO Compare shape
+
+ const auto element_count = count_elements(tensor);
+
+ std::cout << "Compare output #" << n << "(" << tensor->name << ", " << element_count
+ << " elements)" << std::endl;
+ for (uint32_t index = 0; index < element_count; ++index)
+ {
+ const auto full_value = output_nodes.at(n).flat<float>().data()[index];
+ const auto lite_value = lite_sess->interp()->typed_output_tensor<float>(n)[index];
+
+ if (!equals(full_value, lite_value))
+ {
+ std::cerr << full_value << " is expected, but " << lite_value << " is obtaeind (at " << n
+ << ":" << index << ")" << std::endl;
+ matched = false;
+ }
+ }
+ }
+
+ //
+ // Cleanup
+ //
+ lite_sess->teardown();
+
+ return matched ? 0 : 255;
+}
diff --git a/runtime/contrib/uben/CMakeLists.txt b/runtime/contrib/uben/CMakeLists.txt
new file mode 100644
index 000000000..59cd52d16
--- /dev/null
+++ b/runtime/contrib/uben/CMakeLists.txt
@@ -0,0 +1,29 @@
+if(NOT BUILD_UBEN)
+ return()
+endif(NOT BUILD_UBEN)
+
+nnas_find_package(ARMCompute QUIET)
+nnas_find_package(Nonius QUIET)
+
+if(NOT ARMCompute_FOUND)
+ return()
+endif(NOT ARMCompute_FOUND)
+
+if(NOT Nonius_FOUND)
+ return()
+endif(NOT Nonius_FOUND)
+
+# 3x3 Convolution with unit stride
+add_executable(uben_conv_3x3 Convolution.cpp)
+target_compile_definitions(uben_conv_3x3 PRIVATE KER_H=3 KER_W=3 STRIDE_H=1 STRIDE_W=1)
+target_compile_definitions(uben_conv_3x3 PRIVATE CL_DIRECT_CONVOLUTION=1)
+target_compile_definitions(uben_conv_3x3 PRIVATE CL_GEMM_CONVOLUTION=1)
+target_compile_definitions(uben_conv_3x3 PRIVATE CL_WINOGRAD_CONVOLUTION=1)
+target_link_libraries(uben_conv_3x3 PRIVATE nonius)
+target_link_libraries(uben_conv_3x3 PRIVATE arm_compute)
+target_link_libraries(uben_conv_3x3 PRIVATE pthread)
+
+add_executable(uben_softmax Softmax.cpp)
+target_link_libraries(uben_softmax PRIVATE nonius)
+target_link_libraries(uben_softmax PRIVATE nnfw_lib_cker)
+target_link_libraries(uben_softmax PRIVATE pthread)
diff --git a/runtime/contrib/uben/Convolution.cpp b/runtime/contrib/uben/Convolution.cpp
new file mode 100644
index 000000000..ad69f1cec
--- /dev/null
+++ b/runtime/contrib/uben/Convolution.cpp
@@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Conv2D (with SAME padding) benchmark with various algorithms
+ */
+
+#ifndef KER_H
+#error "KER_H is undefined"
+#endif // KER_H
+#ifndef KER_W
+#error "KER_W is undefined"
+#endif // KER_W
+#ifndef STRIDE_H
+#error "STRIDE_H is undefined"
+#endif // STRIDE_H
+#ifndef STRIDE_W
+#error "STRIDE_W is undefined"
+#endif // STRIDE_W
+
+#define NONIUS_RUNNER
+#include <nonius/nonius_single.h++>
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+#include <cstdint>
+#include <cassert>
+#include <stdexcept>
+
+using namespace arm_compute;
+
+//
+// Helpers
+//
+namespace
+{
+
+enum Layout
+{
+ NCHW,
+ NHWC
+};
+
+struct Initializer
+{
+ Initializer() { CLScheduler::get().default_init(); }
+};
+
+Initializer initializer;
+
+TensorInfo make_info(uint32_t N)
+{
+ TensorShape shape{N};
+ return TensorInfo{shape, 1, DataType::F32};
+}
+
+template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
+
+template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+{
+ TensorShape shape{W, H, C, N};
+ TensorInfo info{shape, 1, DataType::F32};
+ info.set_data_layout(DataLayout::NCHW);
+ return info;
+}
+
+template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+{
+ TensorShape shape{C, W, H, N};
+ TensorInfo info{shape, 1, DataType::F32};
+ info.set_data_layout(DataLayout::NHWC);
+ return info;
+}
+
+inline void check(const Status &status)
+{
+ if (!status)
+ {
+ std::cerr << status.error_description() << std::endl;
+ throw std::runtime_error{"ERROR"};
+ }
+}
+
+inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
+
+} // namespace
+
+//
+// Benchmark Parameters
+//
+NONIUS_PARAM(BATCH, 1);
+
+NONIUS_PARAM(IFM_C, 3);
+NONIUS_PARAM(IFM_H, 244);
+NONIUS_PARAM(IFM_W, 244);
+
+NONIUS_PARAM(OFM_C, 3);
+NONIUS_PARAM(OFM_H, 244);
+NONIUS_PARAM(OFM_W, 244);
+
+//
+// Configuration Helpers
+//
+namespace
+{
+
+struct Configuration
+{
+ uint32_t ifm_N;
+ uint32_t ifm_C;
+ uint32_t ifm_H;
+ uint32_t ifm_W;
+
+ uint32_t ofm_N;
+ uint32_t ofm_C;
+ uint32_t ofm_H;
+ uint32_t ofm_W;
+
+ uint32_t ker_N;
+ uint32_t ker_C;
+ uint32_t ker_H;
+ uint32_t ker_W;
+
+ uint32_t vertical_stride;
+ uint32_t horizontal_stride;
+
+ uint32_t top_padding;
+ uint32_t bottom_padding;
+ uint32_t left_padding;
+ uint32_t right_padding;
+
+ Configuration(nonius::chronometer meter)
+ {
+ ifm_N = meter.param<BATCH>();
+ ifm_C = meter.param<IFM_C>();
+ ifm_H = meter.param<IFM_H>();
+ ifm_W = meter.param<IFM_W>();
+
+ ofm_N = meter.param<BATCH>();
+ ofm_C = meter.param<OFM_C>();
+ ofm_H = meter.param<OFM_H>();
+ ofm_W = meter.param<OFM_W>();
+
+ ker_N = meter.param<OFM_C>();
+ ker_C = meter.param<IFM_C>();
+ ker_H = KER_H;
+ ker_W = KER_W;
+
+ vertical_stride = STRIDE_H;
+ horizontal_stride = STRIDE_W;
+
+ assert((ifm_H - ker_H) % vertical_stride == 0);
+ assert((ifm_W - ker_H) % horizontal_stride == 0);
+
+ uint32_t const effective_ofm_H = (ifm_H - ker_H) / vertical_stride + 1;
+ uint32_t const effective_ofm_W = (ifm_W - ker_H) / horizontal_stride + 1;
+
+ assert(ofm_H >= effective_ofm_H);
+ assert(ofm_W >= effective_ofm_W);
+
+ uint32_t const pad_H = ofm_H - effective_ofm_H;
+ uint32_t const pad_W = ofm_W - effective_ofm_W;
+
+ top_padding = pad_H / 2;
+ bottom_padding = pad_H / 2;
+ left_padding = pad_W / 2;
+ right_padding = pad_W / 2;
+
+ if (is_odd(pad_H))
+ top_padding += 1;
+ if (is_odd(pad_W))
+ left_padding += 1;
+ }
+
+ template <Layout L> TensorInfo src_info() const
+ {
+ return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
+ }
+ template <Layout L> TensorInfo dst_info() const
+ {
+ return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
+ }
+ template <Layout L> TensorInfo ker_info() const
+ {
+ return make_info<L>(ker_N, ker_C, ker_H, ker_W);
+ }
+ TensorInfo bias_info(void) const { return make_info(ker_N); }
+
+ PadStrideInfo pad_stride_info(void) const
+ {
+ return PadStrideInfo{horizontal_stride,
+ vertical_stride,
+ left_padding,
+ right_padding,
+ top_padding,
+ bottom_padding,
+ DimensionRoundingType::FLOOR};
+ }
+};
+
+} // namespace
+
+//
+// Benchmakr Implementations
+//
+#ifndef CL_DIRECT_CONVOLUTION
+#error "CL_DIRECT_CONVOLUTION is undefined"
+#endif // CL_DIRECT_CONVOLUTION
+
+#if CL_DIRECT_CONVOLUTION
+NONIUS_BENCHMARK("CLDirectConvolutionLayer(NCHW)", [](nonius::chronometer meter) {
+ CLDirectConvolutionLayer conv;
+
+ // Configure
+ Configuration p{meter};
+
+ CLTensor src_tensor{};
+ CLTensor dst_tensor{};
+ CLTensor ker_tensor{};
+ CLTensor bias_tensor{};
+
+ src_tensor.allocator()->init(p.src_info<NCHW>());
+ dst_tensor.allocator()->init(p.dst_info<NCHW>());
+ ker_tensor.allocator()->init(p.ker_info<NCHW>());
+ bias_tensor.allocator()->init(p.bias_info());
+
+ check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+ p.pad_stride_info()));
+ conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
+
+ src_tensor.allocator()->allocate();
+ ker_tensor.allocator()->allocate();
+ bias_tensor.allocator()->allocate();
+ dst_tensor.allocator()->allocate();
+
+ // Run!
+ meter.measure([&](int) {
+ conv.run();
+ CLScheduler::get().sync();
+ });
+})
+
+NONIUS_BENCHMARK("CLDirectConvolutionLayer(NHWC)", [](nonius::chronometer meter) {
+ CLDirectConvolutionLayer conv;
+
+ // Configure
+ Configuration p{meter};
+
+ CLTensor src_tensor{};
+ CLTensor dst_tensor{};
+ CLTensor ker_tensor{};
+ CLTensor bias_tensor{};
+
+ src_tensor.allocator()->init(p.src_info<NHWC>());
+ dst_tensor.allocator()->init(p.dst_info<NHWC>());
+ ker_tensor.allocator()->init(p.ker_info<NHWC>());
+ bias_tensor.allocator()->init(p.bias_info());
+
+ check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+ p.pad_stride_info()));
+ conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
+
+ src_tensor.allocator()->allocate();
+ ker_tensor.allocator()->allocate();
+ bias_tensor.allocator()->allocate();
+ dst_tensor.allocator()->allocate();
+
+ // Run!
+ meter.measure([&](int) {
+ conv.run();
+ CLScheduler::get().sync();
+ });
+})
+#endif // CL_DIRECT_CONVOLUTION
+
+#ifndef CL_GEMM_CONVOLUTION
+#error "CL_GEMM_CONVOLUTION is undefined"
+#endif // CL_GEMM_CONVOLUTION
+
+#if CL_GEMM_CONVOLUTION
+NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NCHW)", [](nonius::chronometer meter) {
+ CLGEMMConvolutionLayer conv;
+
+ // Configure
+ Configuration p{meter};
+
+ CLTensor src_tensor{};
+ CLTensor dst_tensor{};
+ CLTensor ker_tensor{};
+ CLTensor bias_tensor{};
+
+ src_tensor.allocator()->init(p.src_info<NCHW>());
+ dst_tensor.allocator()->init(p.dst_info<NCHW>());
+ ker_tensor.allocator()->init(p.ker_info<NCHW>());
+ bias_tensor.allocator()->init(p.bias_info());
+
+ check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+ p.pad_stride_info()));
+ conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
+
+ src_tensor.allocator()->allocate();
+ ker_tensor.allocator()->allocate();
+ bias_tensor.allocator()->allocate();
+ dst_tensor.allocator()->allocate();
+
+ // Run
+ meter.measure([&](int) {
+ conv.run();
+ CLScheduler::get().sync();
+ });
+})
+
+NONIUS_BENCHMARK("CLGEMMConvolutionLayer(NHWC)", [](nonius::chronometer meter) {
+ CLGEMMConvolutionLayer conv;
+
+ // Configure
+ Configuration p{meter};
+
+ CLTensor src_tensor{};
+ CLTensor dst_tensor{};
+ CLTensor ker_tensor{};
+ CLTensor bias_tensor{};
+
+ src_tensor.allocator()->init(p.src_info<NHWC>());
+ dst_tensor.allocator()->init(p.dst_info<NHWC>());
+ ker_tensor.allocator()->init(p.ker_info<NHWC>());
+ bias_tensor.allocator()->init(p.bias_info());
+
+ check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+ p.pad_stride_info()));
+ conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
+
+ src_tensor.allocator()->allocate();
+ ker_tensor.allocator()->allocate();
+ bias_tensor.allocator()->allocate();
+ dst_tensor.allocator()->allocate();
+
+ // Run
+ meter.measure([&](int) {
+ conv.run();
+ CLScheduler::get().sync();
+ });
+})
+#endif // CL_GEMM_CONVOLUTION
+
+#ifndef CL_WINOGRAD_CONVOLUTION
+#error "CL_WINOGRAD_CONVOLUTION is undefined"
+#endif // CL_WINOGRAD_CONVOLUTION
+
+#if CL_WINOGRAD_CONVOLUTION
+NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NCHW)", [](nonius::chronometer meter) {
+ CLWinogradConvolutionLayer conv;
+
+ // Configure
+ Configuration p{meter};
+
+ CLTensor src_tensor{};
+ CLTensor dst_tensor{};
+ CLTensor ker_tensor{};
+ CLTensor bias_tensor{};
+
+ src_tensor.allocator()->init(p.src_info<NCHW>());
+ dst_tensor.allocator()->init(p.dst_info<NCHW>());
+ ker_tensor.allocator()->init(p.ker_info<NCHW>());
+ bias_tensor.allocator()->init(p.bias_info());
+
+ check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+ p.pad_stride_info()));
+ conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
+
+ src_tensor.allocator()->allocate();
+ ker_tensor.allocator()->allocate();
+ bias_tensor.allocator()->allocate();
+ dst_tensor.allocator()->allocate();
+
+ // Run
+ meter.measure([&](int) {
+ conv.run();
+ CLScheduler::get().sync();
+ });
+})
+
+NONIUS_BENCHMARK("CLWinogradConvolutionLayer(NHWC)", [](nonius::chronometer meter) {
+ CLWinogradConvolutionLayer conv;
+
+ // Configure
+ Configuration p{meter};
+
+ CLTensor src_tensor{};
+ CLTensor dst_tensor{};
+ CLTensor ker_tensor{};
+ CLTensor bias_tensor{};
+
+ src_tensor.allocator()->init(p.src_info<NHWC>());
+ dst_tensor.allocator()->init(p.dst_info<NHWC>());
+ ker_tensor.allocator()->init(p.ker_info<NHWC>());
+ bias_tensor.allocator()->init(p.bias_info());
+
+ check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
+ p.pad_stride_info()));
+ conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
+
+ src_tensor.allocator()->allocate();
+ ker_tensor.allocator()->allocate();
+ bias_tensor.allocator()->allocate();
+ dst_tensor.allocator()->allocate();
+
+ // Run
+ meter.measure([&](int) {
+ conv.run();
+ CLScheduler::get().sync();
+ });
+})
+#endif // CL_WINOGRAD_CONVOLUTION
diff --git a/runtime/contrib/uben/Softmax.cpp b/runtime/contrib/uben/Softmax.cpp
new file mode 100644
index 000000000..1c4a6b197
--- /dev/null
+++ b/runtime/contrib/uben/Softmax.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Softmax benchmark
+ */
+
+#define NONIUS_RUNNER
+#include <nonius/nonius_single.h++>
+
+#include <cker/operation/SoftMax.h>
+
+#include <vector>
+
+//
+// Parameters
+//
+NONIUS_PARAM(LEN, 1000);
+
+//
+// Implementations
+//
+NONIUS_BENCHMARK("cker::Softmax(float)", [](nonius::chronometer meter) {
+ auto len = meter.param<LEN>();
+
+ nnfw::cker::SoftmaxParams params;
+ nnfw::cker::Shape shape{1, len};
+
+ params.beta = 1.0;
+
+ std::vector<float> input;
+ std::vector<float> output;
+
+ input.resize(len);
+ output.resize(len);
+
+ meter.measure([&](int) {
+ // Run!
+ nnfw::cker::Softmax(params, shape, input.data(), shape, output.data());
+ });
+})