summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2018-05-04 08:57:16 (GMT)
committerChunseok Lee <chunseok.lee@samsung.com>2018-05-04 08:57:16 (GMT)
commit07659ccd9fe7b1cf1547cc6cad78bcf489f0a361 (patch)
treecf3a123812b7f1ad8b50d7d0ace891e0c03c6110
parentda6f7a3e8360a49fd073a6e0031a4da134d9d984 (diff)
downloadnnfw-07659ccd9fe7b1cf1547cc6cad78bcf489f0a361.zip
nnfw-07659ccd9fe7b1cf1547cc6cad78bcf489f0a361.tar.gz
nnfw-07659ccd9fe7b1cf1547cc6cad78bcf489f0a361.tar.bz2
Imported Upstream version 0.1upstream/0.1submit/tizen/20180504.091146
-rw-r--r--.clang-format90
-rw-r--r--.gitignore72
-rw-r--r--.gitmodules8
-rw-r--r--CMakeLists.txt181
-rw-r--r--LICENSE613
-rw-r--r--Makefile233
-rw-r--r--README.md16
-rw-r--r--cmake/config/config_aarch64-linux.cmake33
-rw-r--r--cmake/config/config_aarch64-tizen.cmake56
-rw-r--r--cmake/config/config_arm64-android.cmake44
-rw-r--r--cmake/config/config_armv7l-linux.cmake33
-rw-r--r--cmake/config/config_armv7l-tizen.cmake61
-rw-r--r--cmake/config/config_x86_64-linux.cmake7
-rw-r--r--cmake/option/option_aarch64-linux.cmake16
-rw-r--r--cmake/option/option_aarch64-tizen.cmake15
-rw-r--r--cmake/option/option_arm64-android.cmake4
-rw-r--r--cmake/option/option_armv7l-linux.cmake21
-rw-r--r--cmake/option/option_armv7l-tizen.cmake20
-rw-r--r--cmake/option/option_linux.cmake9
-rw-r--r--cmake/option/option_x86_64-linux.cmake12
-rw-r--r--docs/howto/HowToAddUnittest.md27
-rw-r--r--externals/CMakeLists.txt42
-rw-r--r--externals/acl.cmake150
-rw-r--r--externals/eigen3.cmake12
-rw-r--r--include/NeuralNetworks.h1929
-rw-r--r--include/kernel/acl/Concatenation.h42
-rw-r--r--include/kernel/acl/Conv2D.h52
-rw-r--r--include/kernel/acl/DepthwiseConv2D.h52
-rw-r--r--include/kernel/acl/FullyConnected.h45
-rw-r--r--include/kernel/acl/Pooling.h66
-rw-r--r--include/kernel/acl/Reshape.h37
-rw-r--r--include/kernel/acl/Softmax.h42
-rw-r--r--include/kernel/acl/nnfw_kernel_acl.h30
-rw-r--r--include/support/nnapi/feature/Reader.h61
-rw-r--r--include/support/nnapi/feature/Utils.h40
-rw-r--r--include/support/tflite/Diff.h119
-rw-r--r--include/support/tflite/FeatureView.h69
-rw-r--r--include/support/tflite/InputIndex.h46
-rw-r--r--include/support/tflite/OutputIndex.h46
-rw-r--r--include/support/tflite/TensorUtils.h43
-rw-r--r--include/support/tflite/TensorView.h64
-rw-r--r--include/support/tflite/interp/Builder.h43
-rw-r--r--include/support/tflite/interp/FlatBufferBuilder.h53
-rw-r--r--include/support/tflite/interp/FunctionBuilder.h56
-rw-r--r--libs/CMakeLists.txt5
-rw-r--r--libs/kernel/CMakeLists.txt3
-rw-r--r--libs/kernel/acl/CMakeLists.txt94
-rw-r--r--libs/kernel/acl/src/CLUniqueTensor.h63
-rw-r--r--libs/kernel/acl/src/DepthwiseConv2D.h98
-rw-r--r--libs/kernel/acl/src/DepthwiseConv2D.test.h245
-rw-r--r--libs/kernel/acl/src/FullyConnected.h149
-rw-r--r--libs/kernel/acl/src/FullyConnected.test.h266
-rw-r--r--libs/kernel/acl/src/IO_accessor.cpp310
-rw-r--r--libs/kernel/acl/src/IO_accessor.h196
-rw-r--r--libs/kernel/acl/src/Init_acl.cpp32
-rw-r--r--libs/kernel/acl/src/NEUniqueTensor.h64
-rw-r--r--libs/kernel/acl/src/Reshape.h70
-rw-r--r--libs/kernel/acl/src/Reshape.test.h51
-rw-r--r--libs/kernel/acl/src/cl/Concatenation.cpp104
-rw-r--r--libs/kernel/acl/src/cl/Concatenation.test.cpp62
-rw-r--r--libs/kernel/acl/src/cl/Conv2D.cpp113
-rw-r--r--libs/kernel/acl/src/cl/Conv2D.test.cpp202
-rw-r--r--libs/kernel/acl/src/cl/DepthwiseConv2D.cpp60
-rw-r--r--libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp20
-rw-r--r--libs/kernel/acl/src/cl/FullyConnected.cpp53
-rw-r--r--libs/kernel/acl/src/cl/FullyConnected.test.cpp20
-rw-r--r--libs/kernel/acl/src/cl/Pooling.cpp130
-rw-r--r--libs/kernel/acl/src/cl/Pooling.test.cpp482
-rw-r--r--libs/kernel/acl/src/cl/Reshape.cpp43
-rw-r--r--libs/kernel/acl/src/cl/Reshape.test.cpp20
-rw-r--r--libs/kernel/acl/src/cl/Softmax.cpp78
-rw-r--r--libs/kernel/acl/src/cl/Softmax.test.cpp105
-rw-r--r--libs/kernel/acl/src/gtest_env.cpp37
-rw-r--r--libs/kernel/acl/src/neon/Concatenation.cpp105
-rw-r--r--libs/kernel/acl/src/neon/Concatenation.test.cpp62
-rw-r--r--libs/kernel/acl/src/neon/Conv2D.cpp111
-rw-r--r--libs/kernel/acl/src/neon/Conv2D.test.cpp202
-rw-r--r--libs/kernel/acl/src/neon/DepthwiseConv2D.cpp61
-rw-r--r--libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp20
-rw-r--r--libs/kernel/acl/src/neon/FullyConnected.cpp58
-rw-r--r--libs/kernel/acl/src/neon/FullyConnected.test.cpp21
-rw-r--r--libs/kernel/acl/src/neon/Pooling.cpp128
-rw-r--r--libs/kernel/acl/src/neon/Pooling.test.cpp436
-rw-r--r--libs/kernel/acl/src/neon/Reshape.cpp48
-rw-r--r--libs/kernel/acl/src/neon/Reshape.test.cpp20
-rw-r--r--libs/kernel/acl/src/neon/Softmax.cpp77
-rw-r--r--libs/kernel/acl/src/neon/Softmax.test.cpp105
-rw-r--r--libs/kernel/acl/src/shape.cpp89
-rw-r--r--libs/kernel/acl/src/shape.h93
-rw-r--r--libs/kernel/acl/src/support.cpp51
-rw-r--r--libs/kernel/acl/src/support.h93
-rw-r--r--libs/kernel/acl/src/util.cpp108
-rw-r--r--libs/kernel/acl/src/util.h193
-rw-r--r--libs/support/CMakeLists.txt2
-rw-r--r--libs/support/nnapi/CMakeLists.txt6
-rw-r--r--libs/support/nnapi/src/feature/Utils.cpp43
-rw-r--r--libs/support/tflite/CMakeLists.txt10
-rw-r--r--libs/support/tflite/src/Diff.cpp262
-rw-r--r--libs/support/tflite/src/FeatureView.cpp76
-rw-r--r--libs/support/tflite/src/TensorView.cpp69
-rw-r--r--libs/support/tflite/src/TensorView.test.cpp36
-rw-r--r--libs/support/tflite/src/interp/FlatBufferBuilder.cpp46
-rw-r--r--libs/support/tflite/src/interp/FunctionBuilder.cpp40
-rw-r--r--libs/util/CMakeLists.txt17
-rw-r--r--libs/util/examples/tensor_index_iterator.cpp38
-rw-r--r--libs/util/include/util/benchmark.h66
-rw-r--r--libs/util/include/util/environment.h63
-rw-r--r--libs/util/include/util/feature/Index.h60
-rw-r--r--libs/util/include/util/feature/IndexIterator.h69
-rw-r--r--libs/util/include/util/feature/Object.h79
-rw-r--r--libs/util/include/util/feature/Reader.h40
-rw-r--r--libs/util/include/util/feature/Shape.h47
-rw-r--r--libs/util/include/util/feature/TextFormatter.h84
-rw-r--r--libs/util/include/util/fp32.h71
-rw-r--r--libs/util/include/util/kernel/IndexIterator.h72
-rw-r--r--libs/util/include/util/kernel/RandomObject.h71
-rw-r--r--libs/util/include/util/kernel/Reader.h40
-rw-r--r--libs/util/include/util/kernel/Shape.h48
-rw-r--r--libs/util/include/util/tensor/Index.h62
-rw-r--r--libs/util/include/util/tensor/IndexFormatter.h52
-rw-r--r--libs/util/include/util/tensor/IndexIterator.h104
-rw-r--r--libs/util/include/util/tensor/NonIncreasingStride.h61
-rw-r--r--libs/util/include/util/tensor/Object.h77
-rw-r--r--libs/util/include/util/tensor/Reader.h40
-rw-r--r--libs/util/include/util/tensor/Shape.h63
-rw-r--r--libs/util/include/util/tensor/Zipper.h72
-rw-r--r--libs/util/include/util/vector.h41
-rw-r--r--libs/util/include/util/vector/Object.h63
-rw-r--r--libs/util/include/util/vector/Reader.h40
-rw-r--r--libs/util/src/environment.cpp79
-rw-r--r--libs/util/src/tensor/IndexFormatter.cpp49
-rw-r--r--libs/util/src/tensor/NonIncreasingStride.cpp46
-rw-r--r--libs/util/src/tensor/Shape.cpp46
-rw-r--r--packaging/nnfw.manifest7
-rw-r--r--packaging/nnfw.spec105
-rwxr-xr-xrun22
-rw-r--r--runtimes/CMakeLists.txt5
-rw-r--r--runtimes/nn/CMakeLists.txt27
-rw-r--r--runtimes/nn/README.md54
-rw-r--r--runtimes/nn/common/CMakeLists.txt31
-rw-r--r--runtimes/nn/common/CpuExecutor.cpp1324
-rw-r--r--runtimes/nn/common/Logging.cpp51
-rw-r--r--runtimes/nn/common/NNFWKernels.cpp72
-rw-r--r--runtimes/nn/common/NNFWKernels.h41
-rw-r--r--runtimes/nn/common/NNFWKernels.lst80
-rw-r--r--runtimes/nn/common/OperationsUtils.cpp565
-rw-r--r--runtimes/nn/common/Utils.cpp397
-rw-r--r--runtimes/nn/common/include/ActivationFunctor.h70
-rw-r--r--runtimes/nn/common/include/CpuExecutor.h165
-rw-r--r--runtimes/nn/common/include/HalInterfaces.h82
-rw-r--r--runtimes/nn/common/include/Logging.h61
-rw-r--r--runtimes/nn/common/include/Operations.h203
-rw-r--r--runtimes/nn/common/include/OperationsUtils.h247
-rw-r--r--runtimes/nn/common/include/Utils.h128
-rw-r--r--runtimes/nn/common/operations/Activation.cpp211
-rw-r--r--runtimes/nn/common/operations/Concatenation.cpp64
-rw-r--r--runtimes/nn/common/operations/Conv2D.cpp154
-rw-r--r--runtimes/nn/common/operations/DepthwiseConv2D.cpp119
-rw-r--r--runtimes/nn/common/operations/FullyConnected.cpp87
-rw-r--r--runtimes/nn/common/operations/Pooling.cpp163
-rw-r--r--runtimes/nn/common/operations/Reshape.cpp103
-rw-r--r--runtimes/nn/common/operations/SimpleMath.cpp217
-rw-r--r--runtimes/nn/common/operations/internal/common.h80
-rw-r--r--runtimes/nn/common/operations/internal/compatibility.h57
-rw-r--r--runtimes/nn/common/operations/internal/optimized/cpu_check.h28
-rw-r--r--runtimes/nn/common/operations/internal/optimized/depthwiseconv_float.h792
-rw-r--r--runtimes/nn/common/operations/internal/optimized/depthwiseconv_uint8.h1606
-rw-r--r--runtimes/nn/common/operations/internal/optimized/neon_tensor_utils.cc217
-rw-r--r--runtimes/nn/common/operations/internal/optimized/neon_tensor_utils.h119
-rw-r--r--runtimes/nn/common/operations/internal/optimized/optimized_ops.h2717
-rw-r--r--runtimes/nn/common/operations/internal/optimized/tensor_utils_impl.h133
-rw-r--r--runtimes/nn/common/operations/internal/tensor_utils.cc29
-rw-r--r--runtimes/nn/common/operations/internal/tensor_utils.h123
-rw-r--r--runtimes/nn/common/operations/internal/tensor_utils_test.cc198
-rw-r--r--runtimes/nn/common/operations/internal/types.h112
-rw-r--r--runtimes/nn/depend/CMakeLists.txt21
-rw-r--r--runtimes/nn/depend/external/CMakeLists.txt13
-rw-r--r--runtimes/nn/depend/external/eigen/CMakeLists.txt10
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/CMakeLists.txt19
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Cholesky41
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/CholmodSupport48
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Core516
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Dense7
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Eigen2
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Eigenvalues57
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Geometry62
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Householder30
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/IterativeLinearSolvers48
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Jacobi33
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/LU46
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/MetisSupport35
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/OrderingMethods73
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/PaStiXSupport48
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/PardisoSupport35
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/QR47
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/QtAlignedMalloc40
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SPQRSupport34
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SVD47
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/Sparse36
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SparseCholesky45
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SparseCore69
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SparseLU46
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SparseQR37
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/StdDeque27
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/StdList26
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/StdVector27
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/SuperLUSupport64
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/UmfPackSupport40
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Cholesky/LDLT.h669
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Cholesky/LLT.h534
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h99
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/CholmodSupport/CholmodSupport.h639
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Array.h331
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/ArrayBase.h226
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/ArrayWrapper.h209
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Assign.h90
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/AssignEvaluator.h935
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/Assign_MKL.h176
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/BandMatrix.h353
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Block.h452
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/BooleanRedux.h164
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CommaInitializer.h160
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/ConditionEstimator.h175
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CoreEvaluators.h1671
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CoreIterators.h127
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CwiseBinaryOp.h184
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CwiseNullaryOp.h866
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CwiseTernaryOp.h197
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryOp.h103
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/CwiseUnaryView.h128
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/DenseBase.h611
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/DenseCoeffsBase.h681
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/DenseStorage.h570
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Diagonal.h257
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/DiagonalMatrix.h343
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/DiagonalProduct.h28
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Dot.h315
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/EigenBase.h159
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/ForceAlignedAccess.h146
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Fuzzy.h155
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/GeneralProduct.h454
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/GenericPacketMath.h593
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/GlobalFunctions.h187
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/IO.h225
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Inverse.h118
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Map.h164
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/MapBase.h299
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/MathFunctions.h1431
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/MathFunctionsImpl.h78
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Matrix.h461
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/MatrixBase.h530
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/NestByValue.h110
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/NoAlias.h108
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/NumTraits.h248
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/PermutationMatrix.h633
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/PlainObjectBase.h1031
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Product.h186
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/ProductEvaluators.h1105
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Random.h182
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Redux.h505
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Ref.h281
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Replicate.h142
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/ReturnByValue.h117
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Reverse.h211
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Select.h162
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/SelfAdjointView.h350
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h51
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Solve.h188
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/SolveTriangular.h232
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/SolverBase.h130
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/StableNorm.h221
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Stride.h111
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Swap.h67
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Transpose.h403
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Transpositions.h407
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/TriangularMatrix.h983
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/VectorBlock.h96
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/VectorwiseOp.h695
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/Visitor.h273
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AVX/Complex.h483
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h439
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AVX/PacketMath.h633
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h51
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h396
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h1316
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AltiVec/Complex.h461
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h322
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h1033
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/CUDA/Complex.h103
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/CUDA/Half.h635
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h91
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h333
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h1123
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h212
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/Default/Settings.h49
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/NEON/Complex.h486
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h91
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/NEON/PacketMath.h729
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/SSE/Complex.h503
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h562
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/arch/SSE/PacketMath.h879
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h77
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/ZVector/Complex.h394
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h137
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h945
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/functors/AssignmentFunctors.h168
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/functors/BinaryFunctors.h482
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/functors/NullaryFunctors.h188
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/functors/StlFunctors.h132
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/functors/TernaryFunctors.h25
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/functors/UnaryFunctors.h823
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h2149
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h492
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h311
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h141
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h115
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralMatrixVector.h619
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h129
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/Parallelizer.h163
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h521
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h275
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h260
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h111
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/SelfadjointProduct.h133
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h93
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h441
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h302
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularMatrixVector.h336
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h241
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h335
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h151
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/products/TriangularSolverVector.h145
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/util/BlasUtil.h398
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/Constants.h547
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/util/DisableStupidWarnings.h75
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/ForwardDeclarations.h302
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/util/MKL_support.h128
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/Macros.h992
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/Memory.h977
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Core/util/Meta.h492
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/NonMPL2.h3
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h27
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/StaticAssert.h216
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Core/util/XprHelper.h821
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h346
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/ComplexSchur.h459
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h91
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/EigenSolver.h622
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h419
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h226
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h374
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h160
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/RealQZ.h654
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/RealSchur.h546
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h77
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h870
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h90
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h556
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/AlignedBox.h392
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/AngleAxis.h247
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/EulerAngles.h114
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Homogeneous.h497
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Hyperplane.h282
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/OrthoMethods.h234
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/ParametrizedLine.h195
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Quaternion.h809
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Rotation2D.h199
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/RotationBase.h206
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/Geometry/Scaling.h170
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Transform.h1542
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Translation.h208
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/Umeyama.h166
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h161
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Householder/BlockHouseholder.h103
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Householder/Householder.h172
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Householder/HouseholderSequence.h470
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h226
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h228
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h245
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h400
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h462
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h394
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h216
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h115
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/Jacobi/Jacobi.h441
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/LU/Determinant.h101
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/LU/FullPivLU.h891
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/LU/InverseImpl.h415
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/LU/PartialPivLU.h611
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h83
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/LU/arch/Inverse_SSE.h338
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/MetisSupport/MetisSupport.h137
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h1843
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/OrderingMethods/Ordering.h157
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h678
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/PardisoSupport/PardisoSupport.h543
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/QR/ColPivHouseholderQR.h653
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h97
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h562
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/QR/FullPivHouseholderQR.h676
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/QR/HouseholderQR.h409
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h68
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h313
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SVD/BDCSVD.h1231
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SVD/JacobiSVD.h804
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h90
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SVD/SVDBase.h313
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SVD/UpperBidiagonalization.h414
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h689
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/AmbiVector.h377
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/CompressedStorage.h258
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h345
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h67
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseAssign.h216
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseBlock.h603
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseColEtree.h206
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseCompressedBase.h341
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h726
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h148
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseDenseProduct.h320
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h138
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseDot.h98
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseFuzzy.h29
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseMap.h305
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseMatrix.h1403
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseMatrixBase.h405
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparsePermutation.h178
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseProduct.h169
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseRedux.h49
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseRef.h397
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h656
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseSolverBase.h124
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h198
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseTranspose.h92
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseTriangularView.h189
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseUtil.h178
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseVector.h478
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/SparseView.h253
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseCore/TriangularSolver.h315
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU.h775
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLUImpl.h66
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Memory.h226
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Structs.h110
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h301
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_Utils.h80
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h181
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h179
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h107
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h280
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h126
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h130
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h223
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h258
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h137
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h136
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h83
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SparseQR/SparseQR.h739
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/StlSupport/StdDeque.h126
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/StlSupport/StdList.h106
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/StlSupport/StdVector.h131
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/StlSupport/details.h84
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h1027
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h506
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/misc/Image.h82
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/misc/Kernel.h79
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/misc/RealSvd2x2.h55
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/misc/blas.h440
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/misc/lapack.h152
-rwxr-xr-xruntimes/nn/depend/external/eigen/Eigen/src/misc/lapacke.h16291
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/misc/lapacke_mangling.h17
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h332
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h552
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/BlockMethods.h1058
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h115
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h163
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h152
-rw-r--r--runtimes/nn/depend/external/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h85
-rw-r--r--runtimes/nn/depend/external/gemmlowp/CMakeLists.txt11
-rw-r--r--runtimes/nn/depend/external/gemmlowp/fixedpoint/fixedpoint.h779
-rw-r--r--runtimes/nn/depend/external/gemmlowp/fixedpoint/fixedpoint_neon.h175
-rw-r--r--runtimes/nn/depend/external/gemmlowp/fixedpoint/fixedpoint_sse.h218
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/allocator.h220
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/block_params.h174
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/common.h256
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/compute.h104
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/dispatch_gemm_shape.h189
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/kernel.h234
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/kernel_default.h109
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/kernel_neon.h1619
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/kernel_reference.h118
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/kernel_sse.h517
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/multi_thread_gemm.h701
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/output.h435
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/output_neon.h432
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/output_sse.h354
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/pack.h435
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/pack_neon.h320
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/pack_sse.h128
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/simd_wrappers.h508
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/simd_wrappers_common_neon_sse.h646
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/simd_wrappers_neon.h150
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/simd_wrappers_sse.h123
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/single_thread_gemm.h158
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/unpack.h278
-rw-r--r--runtimes/nn/depend/external/gemmlowp/profiling/instrumentation.h244
-rw-r--r--runtimes/nn/depend/external/gemmlowp/profiling/profiler.h373
-rw-r--r--runtimes/nn/depend/external/gemmlowp/public/bit_depth.h62
-rw-r--r--runtimes/nn/depend/external/gemmlowp/public/gemmlowp.h87
-rw-r--r--runtimes/nn/depend/external/gemmlowp/public/map.h140
-rw-r--r--runtimes/nn/depend/external/gemmlowp/public/output_stages.h185
-rw-r--r--runtimes/nn/depend/hal/CMakeLists.txt10
-rw-r--r--runtimes/nn/depend/hal/include/android/hardware/neuralnetworks/1.0/types.h493
-rw-r--r--runtimes/nn/depend/libcutils/CMakeLists.txt22
-rw-r--r--runtimes/nn/depend/libcutils/ashmem-host.c97
-rw-r--r--runtimes/nn/depend/libcutils/include/cutils/ashmem.h34
-rw-r--r--runtimes/nn/depend/libcutils/include/cutils/native_handle.h102
-rw-r--r--runtimes/nn/depend/libcutils/native_handle.c95
-rw-r--r--runtimes/nn/depend/libhidl/CMakeLists.txt11
-rw-r--r--runtimes/nn/depend/libhidl/base/CMakeLists.txt22
-rw-r--r--runtimes/nn/depend/libhidl/base/HidlSupport.cpp283
-rw-r--r--runtimes/nn/depend/libhidl/base/Status.cpp166
-rw-r--r--runtimes/nn/depend/libhidl/base/include/hidl/HidlInternal.h193
-rw-r--r--runtimes/nn/depend/libhidl/base/include/hidl/HidlSupport.h989
-rw-r--r--runtimes/nn/depend/libhidl/base/include/hidl/Status.h273
-rw-r--r--runtimes/nn/depend/libutils/CMakeLists.txt22
-rw-r--r--runtimes/nn/depend/libutils/RefBase.cpp809
-rw-r--r--runtimes/nn/depend/libutils/StrongPointer.cpp29
-rw-r--r--runtimes/nn/depend/libutils/include/utils/Compat.h87
-rw-r--r--runtimes/nn/depend/libutils/include/utils/Errors.h88
-rw-r--r--runtimes/nn/depend/libutils/include/utils/LightRefBase.h72
-rw-r--r--runtimes/nn/depend/libutils/include/utils/RefBase.h690
-rw-r--r--runtimes/nn/depend/libutils/include/utils/StrongPointer.h245
-rw-r--r--runtimes/nn/depend/libutils/include/utils/TypeHelpers.h336
-rw-r--r--runtimes/nn/runtime/CMakeLists.txt29
-rw-r--r--runtimes/nn/runtime/Callbacks.cpp115
-rw-r--r--runtimes/nn/runtime/Callbacks.h249
-rw-r--r--runtimes/nn/runtime/CompilationBuilder.cpp68
-rw-r--r--runtimes/nn/runtime/CompilationBuilder.h55
-rw-r--r--runtimes/nn/runtime/ExecutionBuilder.cpp293
-rw-r--r--runtimes/nn/runtime/ExecutionBuilder.h147
-rw-r--r--runtimes/nn/runtime/Memory.cpp199
-rw-r--r--runtimes/nn/runtime/Memory.h122
-rw-r--r--runtimes/nn/runtime/ModelBuilder.cpp386
-rw-r--r--runtimes/nn/runtime/ModelBuilder.h129
-rw-r--r--runtimes/nn/runtime/NeuralNetworks.cpp489
-rw-r--r--runtimes/tests/CMakeLists.txt8
-rw-r--r--runtimes/tests/bring_up_test/CMakeLists.txt22
-rw-r--r--runtimes/tests/bring_up_test/add_main.cpp117
-rw-r--r--runtimes/tests/bring_up_test/cplusplus_main.cpp16
-rw-r--r--runtimes/tests/bring_up_test/simple_model.cpp469
-rw-r--r--runtimes/tests/bring_up_test/simple_model.h63
-rw-r--r--runtimes/tests/bring_up_test/simple_model_main.cpp35
-rw-r--r--runtimes/tests/include/NeuralNetworksWrapper.h344
-rw-r--r--runtimes/tests/neural_networks_test/CMakeLists.txt34
-rw-r--r--runtimes/tests/neural_networks_test/TestGenerated.cpp139
-rw-r--r--runtimes/tests/neural_networks_test/TestMain.cpp31
-rw-r--r--runtimes/tests/neural_networks_test/TestTrivialModel.cpp221
-rw-r--r--runtimes/tests/neural_networks_test/TestValidation.cpp489
-rw-r--r--runtimes/tests/neural_networks_test/generated/all_generated_tests.cpp1025
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/add.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/add_broadcast_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/add_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_float_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_float_4.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_float_5.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_quant8_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_quant8_4.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/avg_pool_quant8_5.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/concat_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/concat_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/concat_float_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/concat_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/concat_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/concat_quant8_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_1_h3_w2_SAME.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_1_h3_w2_VALID.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_3_h3_w2_SAME.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_3_h3_w2_VALID.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float_channels.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float_channels_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float_large_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_float_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_channels.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_channels_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_large_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_overflow.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_overflow_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/conv_quant8_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depth_to_space_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depth_to_space_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depth_to_space_float_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depth_to_space_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depth_to_space_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float_large_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float_large_2_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float_large_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_float_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_quant8_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_quant8_large_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/depthwise_conv2d_quant8_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/dequantize.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/embedding_lookup.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/floor.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_float.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_float_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_float_large_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_float_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_quant8_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_quant8_large_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/fully_connected_quant8_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/hashtable_lookup_float.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/hashtable_lookup_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/l2_normalization.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/l2_normalization_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/l2_normalization_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/l2_pool_float.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/l2_pool_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/l2_pool_float_large.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/local_response_norm_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/local_response_norm_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/local_response_norm_float_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/local_response_norm_float_4.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/logistic_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/logistic_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/logistic_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/logistic_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lsh_projection.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lsh_projection_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lsh_projection_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm2_state.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm2_state2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm3_state.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm3_state2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm3_state3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm_state.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/lstm_state2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_float_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_float_4.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_quant8_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/max_pool_quant8_4.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/mobilenet_224_gender_basic_fixed.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/mobilenet_quantized.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/mul.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/mul_broadcast_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/mul_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/mul_relu.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu1_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu1_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu1_quant8_1.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu1_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu6_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu6_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu6_quant8_1.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu6_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu_quant8_1.example.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/relu_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/reshape.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/reshape_quant8.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/reshape_quant8_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/reshape_weights_as_inputs.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/resize_bilinear.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/resize_bilinear_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/rnn.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/rnn_state.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/softmax_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/softmax_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/softmax_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/softmax_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/space_to_depth_float_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/space_to_depth_float_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/space_to_depth_float_3.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/space_to_depth_quant8_1.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/space_to_depth_quant8_2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/svdf.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/svdf2.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/svdf_state.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/examples/tanh.example.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/add.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/add_broadcast_quant8.model.cpp25
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/add_quant8.model.cpp25
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_float_1.model.cpp29
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_float_2.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_float_3.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_float_4.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_float_5.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_quant8_1.model.cpp29
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_quant8_2.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_quant8_3.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_quant8_4.model.cpp29
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/avg_pool_quant8_5.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/concat_float_1.model.cpp25
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/concat_float_2.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/concat_float_3.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/concat_quant8_1.model.cpp25
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/concat_quant8_2.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/concat_quant8_3.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_1_h3_w2_SAME.model.cpp41
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_1_h3_w2_VALID.model.cpp41
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_3_h3_w2_SAME.model.cpp40
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_3_h3_w2_VALID.model.cpp41
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float.model.cpp37
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float_2.model.cpp37
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float_channels.model.cpp37
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float_channels_weights_as_inputs.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float_large.model.cpp37
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float_large_weights_as_inputs.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_float_weights_as_inputs.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8.model.cpp38
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_2.model.cpp41
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_channels.model.cpp38
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_channels_weights_as_inputs.model.cpp34
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_large.model.cpp38
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_large_weights_as_inputs.model.cpp34
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_overflow.model.cpp38
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_overflow_weights_as_inputs.model.cpp34
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/conv_quant8_weights_as_inputs.model.cpp34
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depth_to_space_float_1.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depth_to_space_float_2.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depth_to_space_float_3.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depth_to_space_quant8_1.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depth_to_space_quant8_2.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv.model.cpp43
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float.model.cpp40
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float_2.model.cpp41
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float_large.model.cpp40
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float_large_2.model.cpp40
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float_large_2_weights_as_inputs.model.cpp37
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float_large_weights_as_inputs.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_float_weights_as_inputs.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_quant8.model.cpp40
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_quant8_2.model.cpp41
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_quant8_large.model.cpp40
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_quant8_large_weights_as_inputs.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/depthwise_conv2d_quant8_weights_as_inputs.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/dequantize.model.cpp20
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/embedding_lookup.model.cpp21
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/floor.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_float.model.cpp31
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_float_2.model.cpp32
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_float_large.model.cpp31
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_float_large_weights_as_inputs.model.cpp27
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_float_weights_as_inputs.model.cpp27
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_quant8.model.cpp32
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_quant8_2.model.cpp32
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_quant8_large.model.cpp31
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_quant8_large_weights_as_inputs.model.cpp27
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/fully_connected_quant8_weights_as_inputs.model.cpp28
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/hashtable_lookup_float.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/hashtable_lookup_quant8.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/l2_normalization.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/l2_normalization_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/l2_normalization_large.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/l2_pool_float.model.cpp29
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/l2_pool_float_2.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/l2_pool_float_large.model.cpp39
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/local_response_norm_float_1.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/local_response_norm_float_2.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/local_response_norm_float_3.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/local_response_norm_float_4.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/logistic_float_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/logistic_float_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/logistic_quant8_1.model.cpp20
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/logistic_quant8_2.model.cpp20
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lsh_projection.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lsh_projection_2.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lsh_projection_weights_as_inputs.model.cpp28
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm.model.cpp59
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm2.model.cpp59
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm2_state.model.cpp59
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm2_state2.model.cpp59
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm3.model.cpp60
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm3_state.model.cpp60
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm3_state2.model.cpp60
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm3_state3.model.cpp60
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm_state.model.cpp59
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/lstm_state2.model.cpp59
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_float_1.model.cpp29
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_float_2.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_float_3.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_float_4.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_quant8_1.model.cpp29
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_quant8_2.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_quant8_3.model.cpp33
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/max_pool_quant8_4.model.cpp30
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/mobilenet_224_gender_basic_fixed.model.cpp672
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/mobilenet_quantized.model.cpp719
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/mul.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/mul_broadcast_quant8.model.cpp26
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/mul_quant8.model.cpp25
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/mul_relu.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu1_float_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu1_float_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu1_quant8_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu1_quant8_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu6_float_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu6_float_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu6_quant8_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu6_quant8_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu_float_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu_float_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu_quant8_1.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/relu_quant8_2.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/reshape.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/reshape_quant8.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/reshape_quant8_weights_as_inputs.model.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/reshape_weights_as_inputs.model.cpp22
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/resize_bilinear.model.cpp27
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/resize_bilinear_2.model.cpp27
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/rnn.model.cpp32
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/rnn_state.model.cpp32
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/softmax_float_1.model.cpp23
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/softmax_float_2.model.cpp23
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/softmax_quant8_1.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/softmax_quant8_2.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/space_to_depth_float_1.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/space_to_depth_float_2.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/space_to_depth_float_3.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/space_to_depth_quant8_1.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/space_to_depth_quant8_2.model.cpp24
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/svdf.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/svdf2.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/svdf_state.model.cpp36
-rw-r--r--runtimes/tests/neural_networks_test/generated/models/tanh.model.cpp19
-rw-r--r--runtimes/tests/neural_networks_test/include/TestHarness.h209
-rw-r--r--scripts/command/build7
-rwxr-xr-xscripts/command/common.sh2
-rwxr-xr-xscripts/command/docker_build.sh10
-rwxr-xr-xscripts/command/docker_build_cross_arm_ubuntu.sh38
-rwxr-xr-xscripts/command/docker_build_cross_arm_ubuntu_without_aclbuild.sh62
-rwxr-xr-xscripts/command/docker_build_tizen_cross.sh48
-rwxr-xr-xscripts/command/docker_build_ubuntu_svace.sh87
-rwxr-xr-xscripts/command/docker_coverage_report.sh38
-rwxr-xr-xscripts/command/docker_cross_test_coverage_build.sh63
-rwxr-xr-xscripts/command/docker_gbs_build.sh36
-rwxr-xr-xscripts/command/docker_run.sh24
-rwxr-xr-xscripts/command/docker_run_test.sh50
-rwxr-xr-xscripts/command/format-checker.sh89
-rw-r--r--scripts/command/gbs.conf21
-rwxr-xr-xscripts/command/gen_coverage_report.sh56
-rw-r--r--scripts/command/imported_url.txt3
-rwxr-xr-xscripts/command/lcov-to-covertura-xml.sh414
-rwxr-xr-xscripts/command/tizen_xu4_test.sh135
-rw-r--r--scripts/docker/Dockerfile14
-rw-r--r--scripts/docker/Dockerfile_tizen8
-rwxr-xr-xtests/framework/run_test.sh228
-rw-r--r--tests/framework/tests/inceptionv3/inception_nonslim/config.sh9
-rw-r--r--tests/framework/tests/inceptionv3/inception_slim/config.sh9
-rw-r--r--tools/.FORMATCHECKED0
-rw-r--r--tools/CMakeLists.txt1
-rw-r--r--tools/cross/apt_proxy0
-rw-r--r--tools/cross/arm/sources.list.trusty11
-rw-r--r--tools/cross/arm/sources.list.xenial11
-rw-r--r--tools/cross/arm64/sources.list.trusty11
-rw-r--r--tools/cross/arm64/sources.list.xenial11
-rwxr-xr-xtools/cross/armel/tizen-build-rootfs.sh43
-rwxr-xr-xtools/cross/armel/tizen-fetch.sh169
-rw-r--r--tools/cross/armel/tizen.patch18
-rwxr-xr-xtools/cross/build_android_rootfs.sh62
-rwxr-xr-xtools/cross/build_rootfs.sh122
-rw-r--r--tools/nnapi_test/CMakeLists.txt5
-rw-r--r--tools/nnapi_test/src/nnapi_test.cc59
-rw-r--r--tools/test_driver/nnfw_kernel_env_list.txt7
-rwxr-xr-xtools/test_driver/print_to_json.sh152
-rwxr-xr-xtools/test_driver/test_driver.sh417
892 files changed, 171789 insertions, 0 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..7dcf11c
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,90 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignEscapedNewlinesLeft: true
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: true
+ AfterUnion: true
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+BreakBeforeBraces: Allman
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 100
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+IncludeCategories:
+ - Regex: '^"(llvm|llvm-c|clang|clang-c)/'
+ Priority: 2
+ - Regex: '^(<|"(gtest|isl|json)/)'
+ Priority: 3
+ - Regex: '.*'
+ Priority: 1
+IndentCaseLabels: true
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+ReflowComments: true
+SortIncludes: false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 4
+UseTab: Never
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ddba013
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,72 @@
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
+# Working Path
+/Product
+/tools/cross/rootfs
+
+# Tools
+/.vscode
+/.svace-dir
+
+# Compiled python3 code cache
+**/__pycache__
+*.pyc
+
+# vim temp files
+.*.sw*
+
+# ctags files
+tags
+
+# gnu-global files
+GPATH
+GTAGS
+GRTAGS
+
+# acl build outputs
+/externals/acl/..dblite
+/externals/acl/build
+/externals/acl/src/acl-stamp
+/externals/acl/tmp
+
+# Test cache for model download
+/tests/framework/cache
+
+# external libs
+/externals/absl/
+/externals/eigen/
+/externals/farmhash/
+/externals/flatbuffers/
+/externals/gemmlowp/
+/externals/neon_2_sse/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..a82b4ea
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,8 @@
+[submodule "tensorflow"]
+ path = externals/tensorflow
+ url = git://git.tizen.org/platform/upstream/tensorflow
+ branch = tizen
+[submodule "acl"]
+ path = externals/acl
+ url = git://git.tizen.org/platform/upstream/armcl
+ branch = master
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..faf88ef
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,181 @@
+cmake_minimum_required(VERSION 2.8.12)
+
+project(nnfw)
+
+if(CMAKE_VERSION VERSION_LESS 3.1.0)
+ set(CMAKE_CXX_FLAGS "-std=c++11")
+else(CMAKE_VERSION VERSION_LESS 3.1.0)
+ set(CMAKE_CXX_STANDARD 11)
+endif(CMAKE_VERSION VERSION_LESS 3.1.0)
+
+# set host platform to build
+if(NOT HOST_ARCH OR "${HOST_ARCH}" STREQUAL "")
+ set(HOST_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})
+endif()
+
+# set target platform to run
+if(NOT TARGET_ARCH OR "${TARGET_ARCH}" STREQUAL "")
+ set(TARGET_ARCH "${HOST_ARCH}")
+endif()
+
+if(NOT DEFINED TARGET_OS)
+ set(TARGET_OS "${HOST_OS}")
+endif()
+
+if("${HOST_ARCH}" STREQUAL "x86_64")
+ set(HOST_ARCH_BASE ${HOST_ARCH})
+elseif("${HOST_ARCH}" STREQUAL "armv7l")
+ set(HOST_ARCH_BASE "arm")
+elseif("${HOST_ARCH}" STREQUAL "arm64")
+ set(HOST_ARCH_BASE "arm64")
+elseif("${HOST_ARCH}" STREQUAL "aarch64")
+ set(HOST_ARCH_BASE "aarch64")
+else()
+ message(FATAL_ERROR "'${HOST_ARCH}' architecture is not supported")
+endif()
+
+if("${TARGET_ARCH}" STREQUAL "x86_64")
+ set(TARGET_ARCH_BASE ${TARGET_ARCH})
+elseif("${TARGET_ARCH}" STREQUAL "armv7l")
+ set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "arm64")
+ set(TARGET_ARCH_BASE "arm64")
+elseif("${TARGET_ARCH}" STREQUAL "aarch64")
+ set(TARGET_ARCH_BASE "aarch64")
+else()
+ message(FATAL_ERROR "'${TARGET_ARCH}' architecture is not supported")
+endif()
+
+# Determine native or cross build
+if("${HOST_ARCH}" STREQUAL "${TARGET_ARCH}")
+ set(BUILD_IS_NATIVE True)
+else()
+ set(BUILD_IS_NATIVE False)
+endif()
+
+# host & target platform name
+set(HOST_PLATFORM "${HOST_ARCH}-${HOST_OS}")
+set(TARGET_PLATFORM "${TARGET_ARCH}-${TARGET_OS}")
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
+
+# platform specific options
+include("cmake/option/option_${TARGET_PLATFORM}.cmake")
+
+# test-coverage build flag
+if("${COVERAGE_BUILD}" STREQUAL "1")
+ set(CMAKE_CXX_OUTPUT_EXTENSION_REPLACE ON)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
+ set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
+ set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage")
+endif()
+
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+#
+# Configuration flags
+#
+option(BUILD_ACL "Build ARM Compute Library" OFF)
+option(BUILD_ACL_STATIC_LIB "Build ARM Comput Static Library" OFF)
+option(BUILD_BENCHMARK_ACL "Build ARM Compute Library Benchmarks" OFF)
+option(BUILD_NN_RUNTIME "Build NN Runtime" ON)
+option(BUILD_LABS "Build lab projects" ON)
+option(BUILD_ANDROID_NN_RUNTIME_TEST "Build Android NN Runtime Test" ON)
+
+#
+# Common variables
+#
+set(NNFW_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/include)
+
+if(NOT "${TARGET_OS}" STREQUAL "tizen" AND NOT "${TARGET_OS}" STREQUAL "android")
+ set(NNFW_ACL_FOLDER ${CMAKE_SOURCE_DIR}/externals/acl)
+ set(NNFW_ACL_INCLUDES ${NNFW_ACL_FOLDER}
+ ${NNFW_ACL_FOLDER}/include)
+else()
+ set(NNFW_ACL_FOLDER "")
+ set(NNFW_ACL_INCLUDES "")
+endif()
+set(NNFW_ACL_LIBS arm_compute_graph arm_compute arm_compute_core)
+set(NNFW_NN_RUNTIME_ROOT ${CMAKE_SOURCE_DIR}/runtimes/nn)
+
+# NOTE '${CMAKE_INSTALL_PREFIX}/lib' should be added as a link directory as
+# CI server places pre-built ARM compute libraries on this directory.
+link_directories(${CMAKE_INSTALL_PREFIX}/lib)
+
+#
+# GTest support
+#
+if("${TARGET_OS}" STREQUAL "tizen" AND NOT "${TARGET_OS}" STREQUAL "android")
+ enable_testing()
+ find_package(GTest REQUIRED)
+ include_directories(${GTEST_INCLUDE_DIR})
+else()
+ include(ExternalProject)
+ # Download and install GoogleTest
+ ExternalProject_Add(
+ googletest
+ URL https://github.com/google/googletest/archive/release-1.8.0.zip
+ PREFIX ${CMAKE_CURRENT_BINARY_DIR}/googletest
+ # Disable install step
+ INSTALL_COMMAND ""
+ LOG_DOWNLOAD 1
+ LOG_BUILD 1
+ LOG_CONFIGURE 1
+ CMAKE_ARGS
+ -DCMAKE_TOOLCHAIN_FILE=${PROJECT_SOURCE_DIR}/cmake/config/config_${TARGET_ARCH}-${TARGET_OS}.cmake
+ )
+ ExternalProject_Get_Property(googletest source_dir binary_dir)
+
+ # include and link path for all sub project
+ include_directories(${source_dir}/googletest/include/)
+ link_directories(${binary_dir}/googlemock/gtest/)
+endif()
+
+# gtest libs
+set(NNFW_GTEST_LIBS libgtest.a libgtest_main.a ${LIB_PTHREAD})
+
+# TODO For now Android build is being enabled incrementally so not all subdirectories are added yet.
+# However we are going to have the same subdirectories with other OS eventually.
+if("${TARGET_OS}" STREQUAL "android")
+
+ include_directories(externals/tensorflow)
+ include_directories(externals/flatbuffers/include)
+ include_directories(include)
+
+ add_subdirectory(libs)
+ add_subdirectory(externals)
+ add_subdirectory(tools/nnapi_test)
+
+ if(BUILD_NN_RUNTIME)
+ add_subdirectory(runtimes/nn)
+ endif(BUILD_NN_RUNTIME)
+ add_subdirectory(src/support/tflite)
+
+else("${TARGET_OS}" STREQUAL "android") # General case (non-android build)
+
+# TODO Fix indentation
+
+if (NOT ${TARGET_OS} STREQUAL "tizen")
+ add_subdirectory(externals)
+endif()
+add_subdirectory(libs)
+add_subdirectory(tools)
+add_subdirectory(runtimes)
+
+endif("${TARGET_OS}" STREQUAL "android")
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..eb1c3bb
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,613 @@
+This file provides full text of licenses used in this project
+
+- Apache Licence 2.0
+- Mozilla Public License 2.0
+- 3-Clause BSD License
+
+...............................................................................
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+...............................................................................
+
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
+
+...............................................................................
+
+Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+* Neither the name of Intel Corporation nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..63b1729
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,233 @@
+TARGET_ARCH?=$(shell uname -p)
+BUILD_TYPE?=Debug
+CROSS_BUILD?=0
+HOST_OS?=linux
+TARGET_OS?=linux
+NPROCS:=1
+OBS_BUILD?=0
+COVERAGE_BUILD?=0
+BENCHMARK_ACL_BUILD?=0
+OPTIONS?=
+
+# make TARGET and TYPE to lowercase
+TARGET_ARCH_LC=$(shell echo $(TARGET_ARCH) | tr A-Z a-z)
+BUILD_TYPE_LC=$(shell echo $(BUILD_TYPE) | tr A-Z a-z)
+# we need base name 'arm` for all arm arch
+TARGET_ARCH_BASE=$(TARGET_ARCH_LC)
+ifneq (,$(findstring arm64,$(TARGET_ARCH_BASE)))
+ # arm64 as target-arch comes from Android
+ TARGET_ARCH_BASE=arm64
+ ifdef ROOTFS_DIR
+ ROOTFS_ARM64=$(ROOTFS_DIR)
+ export ROOTFS_ARM64
+ endif
+ # For now Android is the only option for arm64
+ TARGET_OS:=android
+else ifneq (,$(findstring arm,$(TARGET_ARCH_BASE)))
+ TARGET_ARCH_BASE=arm
+ ifdef ROOTFS_DIR
+ ROOTFS_ARM=$(ROOTFS_DIR)
+ export ROOTFS_ARM
+ endif
+else ifneq (,$(findstring aarch64,$(TARGET_ARCH_BASE)))
+ # aarch64 as target-arch comes from all except for Android
+ TARGET_ARCH_BASE=aarch64
+ ifdef ROOTFS_DIR
+ ROOTFS_ARM64=$(ROOTFS_DIR)
+ export ROOTFS_ARM64
+ endif
+endif
+# Todo: we may set CROSS_BUILD=1 when ROOTFS_DIR is given
+# the toolchain file, only for cross build
+ifeq ($(CROSS_BUILD),1)
+ TOOLCHAIN_FILE=cmake/config/config_$(TARGET_ARCH_LC)-$(TARGET_OS).cmake
+ OPTION_TOOLCHAIN=-DCMAKE_TOOLCHAIN_FILE=$(TOOLCHAIN_FILE)
+else
+ OPTION_TOOLCHAIN=
+endif
+
+ifeq ($(COVERAGE_BUILD),1)
+ OPTIONS+= -DCOVERAGE_BUILD=1
+else
+ OPTIONS+= -DCOVERAGE_BUILD=0
+endif
+
+ifeq ($(BENCHMARK_ACL_BUILD),1)
+ OPTIONS+= -DBUILD_BENCHMARK_ACL=1
+endif
+
+# Get number of processors (linux only for now)
+ifeq ($(HOST_OS),linux)
+ NPROCS:=$(shell grep -c ^processor /proc/cpuinfo)
+endif
+
+WORKHOME=Product
+WORKFOLDER=$(TARGET_ARCH_LC)-$(TARGET_OS).$(BUILD_TYPE_LC)
+WORKDIR=$(WORKHOME)/$(WORKFOLDER)
+
+BUILD_ROOT=$(WORKDIR)/obj
+INSTALL_ROOT=$(WORKDIR)/out
+
+BUILD_FOLDER=$(WORKFOLDER)/obj
+INSTALL_FOLDER=$(WORKFOLDER)/out
+BUILD_ALIAS=$(WORKHOME)/obj
+INSTALL_ALIAS=$(WORKHOME)/out
+
+INSTALL_PATH?=$(CURDIR)/$(WORKDIR)/out
+
+TIMESTAMP_CONFIGURE=$(WORKDIR)/CONFIGURE
+TIMESTAMP_BUILD=$(WORKDIR)/BUILD
+TIMESTAMP_INSTALL=$(WORKDIR)/INSTALL
+
+ACL_FOLDER=externals/acl
+ACL_COMMAND=scons -j${NPROCS} Werror=1 neon=1 opencl=1 os=linux examples=0 embed_kernels=1
+ifeq ($(TARGET_ARCH_LC),armv7l)
+ ACL_COMMAND+= arch=armv7a
+else ifeq ($(TARGET_ARCH_LC),aarch64)
+ ACL_COMMAND+= arch=arm64-v8a
+endif
+ifeq ($(BUILD_TYPE_LC),debug)
+ ACL_COMMAND+=debug=1 asserts=1 build_dir=debug
+ ACL_FOLDER_BUILD=$(ACL_FOLDER)/build/debug
+else
+ ACL_COMMAND+=build_dir=release
+ ACL_FOLDER_BUILD=$(ACL_FOLDER)/build/release
+endif
+
+all: build
+
+###
+### Command (public)
+###
+configure: configure_internal
+
+build: build_internal
+
+install: $(TIMESTAMP_INSTALL)
+
+clean:
+ rm -rf $(WORKDIR)
+
+acl: internal_acl_build internal_acl_install
+
+check: check_internal
+
+runtime: runtime_build_internal
+
+testbuild: test_build_internal
+
+tflite: tflite_build_internal
+
+###
+### Command (internal)
+###
+configure_internal:
+ifneq ($(TARGET_OS),tizen)
+ git submodule update --init --recursive
+endif
+
+ mkdir -p $(BUILD_ROOT)
+ cmake -B$(CURDIR)/$(BUILD_ROOT) -H$(CURDIR) \
+ -DCMAKE_INSTALL_PREFIX=$(INSTALL_PATH) \
+ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE_LC) -DTARGET_ARCH=$(TARGET_ARCH_LC) \
+ -DHOST_OS=$(HOST_OS) \
+ -DTARGET_OS=$(TARGET_OS) \
+ $(OPTION_TOOLCHAIN) \
+ $(OPTIONS)
+ touch $(TIMESTAMP_CONFIGURE)
+
+build_internal: $(BUILD_ROOT)
+ cd $(BUILD_ROOT) && make -j $(NPROCS) all
+ rm -rf $(BUILD_ALIAS)
+ ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
+ touch $(TIMESTAMP_BUILD)
+
+install_internal:
+ cd $(BUILD_ROOT) && make install
+ rm -rf $(INSTALL_ALIAS)
+ ln -s $(INSTALL_FOLDER) $(INSTALL_ALIAS)
+ touch $(TIMESTAMP_INSTALL)
+
+internal_acl_build:
+ cd $(ACL_FOLDER) && $(ACL_COMMAND)
+
+internal_acl_install:
+ @mkdir -vp $(INSTALL_PATH)/lib
+ @cp -v $(ACL_FOLDER_BUILD)/libarm_compute_core.so $(INSTALL_PATH)/lib/.
+ @cp -v $(ACL_FOLDER_BUILD)/libarm_compute_graph.so $(INSTALL_PATH)/lib/.
+ @cp -v $(ACL_FOLDER_BUILD)/libarm_compute.so $(INSTALL_PATH)/lib/.
+
+external_acl:
+ifneq ($(EXT_ACL_FOLDER),"")
+ @mkdir -vp $(INSTALL_PATH)/lib
+ @cp -v $(EXT_ACL_FOLDER)/libarm_compute_core.so $(INSTALL_PATH)/lib/.
+ @cp -v $(EXT_ACL_FOLDER)/libarm_compute_graph.so $(INSTALL_PATH)/lib/.
+ @cp -v $(EXT_ACL_FOLDER)/libarm_compute.so $(INSTALL_PATH)/lib/.
+endif
+
+ifeq ($(CROSS_BUILD),1)
+check_internal: test_internal_cross
+else
+check_internal: test_internal
+endif
+
+# TODO: Enable tests on cross build
+test_internal_cross:
+ @echo "Can't do tests on cross build"
+
+test_internal:
+ tools/test_driver/test_driver.sh .
+
+build_test_suite: install_internal
+ @echo "packaging test suite"
+ @rm -rf $(INSTALL_ROOT)/test-suite.tar.gz
+ @tar -zcf test-suite.tar.gz tests/ tools/test_driver $(INSTALL_ALIAS) --dereference
+ @mv test-suite.tar.gz $(INSTALL_ROOT)/.
+
+build_coverage_suite: install_internal
+ @echo "packaging test-coverage suite"
+ @rm -rf $(INSTALL_ROOT)/coverage-suite.tar.gz
+ @find Product -name "*.gcno" > include_lists.txt
+ @tar -zcf coverage-suite.tar.gz tests/ tools/test_driver $(INSTALL_ALIAS) --dereference -T include_lists.txt
+ @rm -rf include_lists.txt
+ @mv coverage-suite.tar.gz $(INSTALL_ROOT)/.
+
+runtime_build_internal: $(BUILD_ROOT)
+ cd $(BUILD_ROOT) && make -j $(NPROCS) runtime
+ rm -rf $(BUILD_ALIAS)
+ ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
+ touch $(TIMESTAMP_BUILD)
+
+test_build_internal: $(BUILD_ROOT)
+ # Build test
+ cd $(BUILD_ROOT) && make -j $(NPROCS) nnapi_test
+ # Build unittest
+ cd $(BUILD_ROOT) && make -j $(NPROCS) kernelacl_test runtime_run_android_nn_test
+ rm -rf $(BUILD_ALIAS)
+ ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
+ touch $(TIMESTAMP_BUILD)
+
+tflite_build_internal: $(BUILD_ROOT)
+ # Build test
+ cd $(BUILD_ROOT) && make -j $(NPROCS) tensorflow-lite
+ rm -rf $(BUILD_ALIAS)
+ ln -s $(BUILD_FOLDER) $(BUILD_ALIAS)
+ touch $(TIMESTAMP_BUILD)
+
+###
+### Timestamps
+###
+$(WORKDIR):
+ mkdir -p $@
+
+$(BUILD_ROOT): $(WORKDIR)
+ make configure_internal
+
+$(TIMESTAMP_CONFIGURE):
+ make configure_internal
+
+$(TIMESTAMP_BUILD): $(TIMESTAMP_CONFIGURE)
+ make build_internal
+
+$(TIMESTAMP_INSTALL): $(TIMESTAMP_BUILD)
+ make install_internal
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3d2a1f4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,16 @@
+# nnfw
+
+A high-performance, on-device neural network inference framework
+
+## Goal
+This project _nnfw_ aims at providing a high-performance, on-device neural network (NN) inference
+framework that performs inference of a given NN model on processors, such as CPU, GPU, or NPU, in
+the target platform, such as Tizen and Smart Machine Platform (SMP).
+
+## About this _experimental_ release
+_Experimental_ means the Tizen M1 release of _nnfw_ has very limited capability, which could only
+run InceptionV3, and would have very limited support from the developers. And, the backward
+compatibility in the future release, e.g., one planned in October, might not be guaranteed.
+
+## How-to documents
+- [How to add unittest using gtest](docs/howto/HowToAddUnittest.md)
diff --git a/cmake/config/config_aarch64-linux.cmake b/cmake/config/config_aarch64-linux.cmake
new file mode 100644
index 0000000..b13a8b0
--- /dev/null
+++ b/cmake/config/config_aarch64-linux.cmake
@@ -0,0 +1,33 @@
+#
+# config for aarch64-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-5)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-5)
+
+# where is the target environment
+set(ROOTFS_ARM64 $ENV{ROOTFS_ARM64})
+if(NOT EXISTS "${ROOTFS_ARM64}/lib/aarch64-linux-gnu")
+ set(ROOTFS_ARM64 "${CMAKE_SOURCE_DIR}/tools/cross/rootfs/arm64")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_ARM64})
+set(CMAKE_FIND_ROOT_PATH ${ROOTFS_ARM64})
+set(CMAKE_SHARED_LINKER_FLAGS
+ "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_ARM64}"
+ CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_ARM64}"
+ CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/cmake/config/config_aarch64-tizen.cmake b/cmake/config/config_aarch64-tizen.cmake
new file mode 100644
index 0000000..e76ad21
--- /dev/null
+++ b/cmake/config/config_aarch64-tizen.cmake
@@ -0,0 +1,56 @@
+#
+# config for aarch64-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc-5)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++-5)
+
+set(TIZEN_TOOLCHAIN "aarch64-tizen-linux-gnu/6.2.1")
+
+# where is the target environment
+set(ROOTFS_ARM64 $ENV{ROOTFS_ARM64})
+if(NOT EXISTS "${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}")
+ set(ROOTFS_ARM64 "${CMAKE_SOURCE_DIR}/tools/cross/rootfs/arm64")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_ARM64})
+set(CMAKE_FIND_ROOT_PATH ${ROOTFS_ARM64})
+set(CMAKE_SHARED_LINKER_FLAGS
+ "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_ARM64}"
+ CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_ARM64}"
+ CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+add_compile_options(--sysroot=${ROOTFS_ARM64})
+
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_ARM64}")
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_ARM64}")
+
+include_directories(SYSTEM ${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}/include/c++/)
+include_directories(SYSTEM ${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}/include/c++/aarch64-tizen-linux-gnu)
+add_compile_options(-Wno-deprecated-declarations) # compile-time option
+add_compile_options(-D__extern_always_inline=inline) # compile-time option
+
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -B${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_ARM64}/lib64")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_ARM64}/usr/lib64")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}")
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -B${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_ARM64}/lib64")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_ARM64}/usr/lib64")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_ARM64}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}")
diff --git a/cmake/config/config_arm64-android.cmake b/cmake/config/config_arm64-android.cmake
new file mode 100644
index 0000000..1c096b5
--- /dev/null
+++ b/cmake/config/config_arm64-android.cmake
@@ -0,0 +1,44 @@
+set(ANDROID_STANDALONE $ENV{ROOTFS_ARM64})
+set(CROSS_NDK_TOOLCHAIN ${ANDROID_STANDALONE}/bin)
+set(CROSS_ROOTFS ${ANDROID_STANDALONE}/sysroot)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_VERSION 1)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+
+## Specify the toolchain
+set(TOOLCHAIN "aarch64-linux-android")
+set(CMAKE_PREFIX_PATH ${CROSS_NDK_TOOLCHAIN})
+set(TOOLCHAIN_PREFIX ${TOOLCHAIN}-)
+
+find_program(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}clang)
+find_program(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}clang++)
+find_program(CMAKE_ASM_COMPILER ${TOOLCHAIN_PREFIX}clang)
+find_program(CMAKE_AR ${TOOLCHAIN_PREFIX}ar)
+find_program(CMAKE_LD ${TOOLCHAIN_PREFIX}ar)
+find_program(CMAKE_OBJCOPY ${TOOLCHAIN_PREFIX}objcopy)
+find_program(CMAKE_OBJDUMP ${TOOLCHAIN_PREFIX}objdump)
+
+add_compile_options(--sysroot=${CROSS_ROOTFS})
+add_compile_options(-fPIE)
+
+## Needed for Android or bionic specific conditionals
+#add_compile_options(-D__ANDROID__)
+#add_compile_options(-D__BIONIC__)
+
+## NOTE Not sure this is safe. This may cause side effects.
+## Without this, it cannot find `std::stol`, `std::stoi` and so on, with android toolchain
+add_compile_options(-D_GLIBCXX_USE_C99=1)
+
+set(CROSS_LINK_FLAGS "${CROSS_LINK_FLAGS} -B${CROSS_ROOTFS}/usr/lib/gcc/${TOOLCHAIN}")
+set(CROSS_LINK_FLAGS "${CROSS_LINK_FLAGS} -L${CROSS_ROOTFS}/lib/${TOOLCHAIN}")
+set(CROSS_LINK_FLAGS "${CROSS_LINK_FLAGS} --sysroot=${CROSS_ROOTFS}")
+set(CROSS_LINK_FLAGS "${CROSS_LINK_FLAGS} -fPIE -pie")
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CROSS_LINK_FLAGS}" CACHE STRING "" FORCE)
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CROSS_LINK_FLAGS}" CACHE STRING "" FORCE)
+set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${CROSS_LINK_FLAGS}" CACHE STRING "" FORCE)
+
+set(CMAKE_FIND_ROOT_PATH "${CROSS_ROOTFS}")
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
diff --git a/cmake/config/config_armv7l-linux.cmake b/cmake/config/config_armv7l-linux.cmake
new file mode 100644
index 0000000..01d3df9
--- /dev/null
+++ b/cmake/config/config_armv7l-linux.cmake
@@ -0,0 +1,33 @@
+#
+# config for arm-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR armv7l)
+
+set(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+
+# where is the target environment
+set(ROOTFS_ARM $ENV{ROOTFS_ARM})
+if(NOT EXISTS "${ROOTFS_ARM}/lib/arm-linux-gnueabihf")
+ set(ROOTFS_ARM "${CMAKE_SOURCE_DIR}/tools/cross/rootfs/arm")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_ARM})
+set(CMAKE_FIND_ROOT_PATH ${ROOTFS_ARM})
+set(CMAKE_SHARED_LINKER_FLAGS
+ "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_ARM}"
+ CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_ARM}"
+ CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/cmake/config/config_armv7l-tizen.cmake b/cmake/config/config_armv7l-tizen.cmake
new file mode 100644
index 0000000..3d49492
--- /dev/null
+++ b/cmake/config/config_armv7l-tizen.cmake
@@ -0,0 +1,61 @@
+#
+# config for arm-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR armv7l)
+
+set(CMAKE_C_COMPILER arm-linux-gnueabi-gcc-5)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabi-g++-5)
+
+set(TIZEN_TOOLCHAIN "armv7l-tizen-linux-gnueabi/6.2.1")
+
+# where is the target environment
+set(ROOTFS_ARM $ENV{ROOTFS_ARM})
+if(NOT EXISTS "${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
+ set(ROOTFS_ARM "${CMAKE_SOURCE_DIR}/tools/cross/rootfs/armel")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_ARM})
+set(CMAKE_FIND_ROOT_PATH ${ROOTFS_ARM})
+set(CMAKE_SHARED_LINKER_FLAGS
+ "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_ARM}"
+ CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_ARM}"
+ CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+
+
+add_compile_options(-mthumb)
+add_compile_options(-mfpu=neon-vfpv4)
+add_compile_options(-mfloat-abi=softfp)
+add_compile_options(--sysroot=${ROOTFS_ARM})
+
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_ARM}")
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_ARM}")
+
+include_directories(SYSTEM ${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/)
+include_directories(SYSTEM ${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/armv7l-tizen-linux-gnueabi)
+add_compile_options(-Wno-deprecated-declarations) # compile-time option
+add_compile_options(-D__extern_always_inline=inline) # compile-time option
+
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -B${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_ARM}/lib")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_ARM}/usr/lib")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -B${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_ARM}/lib")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_ARM}/usr/lib")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L${ROOTFS_ARM}/usr/lib/gcc/${TIZEN_TOOLCHAIN}")
diff --git a/cmake/config/config_x86_64-linux.cmake b/cmake/config/config_x86_64-linux.cmake
new file mode 100644
index 0000000..3dee876
--- /dev/null
+++ b/cmake/config/config_x86_64-linux.cmake
@@ -0,0 +1,7 @@
+#
+# config for x86_64-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR x86_64)
diff --git a/cmake/option/option_aarch64-linux.cmake b/cmake/option/option_aarch64-linux.cmake
new file mode 100644
index 0000000..fa5c920
--- /dev/null
+++ b/cmake/option/option_aarch64-linux.cmake
@@ -0,0 +1,16 @@
+#
+# aarch64 linux compile options
+#
+
+message(STATUS "Building for AARCH64 Linux")
+
+# include linux common
+include("cmake/option/option_linux.cmake")
+
+if(NOT EXISTS "${ROOTFS_ARM64}/lib/aarch64-linux-gnu")
+ message(FATAL_ERROR "Please prepare RootFS for ARM64")
+endif()
+
+# addition for aarch64-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ )
diff --git a/cmake/option/option_aarch64-tizen.cmake b/cmake/option/option_aarch64-tizen.cmake
new file mode 100644
index 0000000..5e37f35
--- /dev/null
+++ b/cmake/option/option_aarch64-tizen.cmake
@@ -0,0 +1,15 @@
+#
+# aarch64 tizen compile options
+#
+
+message(STATUS "Building for AARCH64 Tizen")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/option/option_linux.cmake")
+
+# TODO : support rootfs setting for tizen cross-build
+
+# addition for aarch64-tizen
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ )
diff --git a/cmake/option/option_arm64-android.cmake b/cmake/option/option_arm64-android.cmake
new file mode 100644
index 0000000..42e504a
--- /dev/null
+++ b/cmake/option/option_arm64-android.cmake
@@ -0,0 +1,4 @@
+include("cmake/option/option_linux.cmake")
+
+# On Android, pthread is contained in bionic(libc)
+set(LIB_PTHREAD "")
diff --git a/cmake/option/option_armv7l-linux.cmake b/cmake/option/option_armv7l-linux.cmake
new file mode 100644
index 0000000..d4505ce
--- /dev/null
+++ b/cmake/option/option_armv7l-linux.cmake
@@ -0,0 +1,21 @@
+#
+# armv7l linux compile options
+#
+
+message(STATUS "Building for ARMv7l Linux")
+
+# include linux common
+include("cmake/option/option_linux.cmake")
+
+if(NOT EXISTS "${ROOTFS_ARM}/lib/arm-linux-gnueabihf")
+ message(FATAL_ERROR "Please prepare RootFS for ARM")
+endif()
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-mcpu=cortex-a7"
+ "-mfloat-abi=hard"
+ "-mfpu=neon-vfpv4"
+ "-funsafe-math-optimizations"
+ "-ftree-vectorize"
+ )
diff --git a/cmake/option/option_armv7l-tizen.cmake b/cmake/option/option_armv7l-tizen.cmake
new file mode 100644
index 0000000..6040eb6
--- /dev/null
+++ b/cmake/option/option_armv7l-tizen.cmake
@@ -0,0 +1,20 @@
+#
+# armv7l tizen compile options
+#
+
+message(STATUS "Building for ARMv7l(softfp) Tizen")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/option/option_linux.cmake")
+
+# TODO : support rootfs setting for tizen cross-build
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-mcpu=cortex-a8"
+ "-mfloat-abi=softfp"
+ "-mfpu=neon-vfpv4"
+ "-funsafe-math-optimizations"
+ "-ftree-vectorize"
+ )
diff --git a/cmake/option/option_linux.cmake b/cmake/option/option_linux.cmake
new file mode 100644
index 0000000..ea533ec
--- /dev/null
+++ b/cmake/option/option_linux.cmake
@@ -0,0 +1,9 @@
+#
+# linux common compile options
+#
+
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG")
diff --git a/cmake/option/option_x86_64-linux.cmake b/cmake/option/option_x86_64-linux.cmake
new file mode 100644
index 0000000..6f4d0eb
--- /dev/null
+++ b/cmake/option/option_x86_64-linux.cmake
@@ -0,0 +1,12 @@
+#
+# x86_64 linux compile options
+#
+message(STATUS "Building for x86-64 Linux")
+
+# include linux common
+include("cmake/option/option_linux.cmake")
+
+# SIMD for x86
+set(FLAGS_COMMON ${FLAGS_COMMON}
+ "-msse4"
+ )
diff --git a/docs/howto/HowToAddUnittest.md b/docs/howto/HowToAddUnittest.md
new file mode 100644
index 0000000..6612f15
--- /dev/null
+++ b/docs/howto/HowToAddUnittest.md
@@ -0,0 +1,27 @@
+# How to Add Unittest using gtest(googletest)
+
+### 1. make own test code
+```
+#include "gtest/gtest.h"
+
+TEST(TFLite_test_case, simple_test)
+{
+ EXPECT_EQ(1, 1);
+}
+```
+
+### 2. Add dependancy on `googletest` to your test executable
+```
+add_executable($YOURTEST_TARGET yourtest1.cc yourtest2.cc)
+add_dependencies($YOURTEST_TARGET googletest)
+```
+
+### 3. Link test executable against libgtest.a and libgtest_main.a (+ pthread)
+```
+target_link_libraries($YOURTEST_TARGET libgtest.a libgtest_main.a pthread)
+```
+
+### 4. Install test executable into Product/out/unittest
+```
+install(TARGETS $YOURTEST_TARGET DESTINATION unittest)
+```
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
new file mode 100644
index 0000000..0d8579e
--- /dev/null
+++ b/externals/CMakeLists.txt
@@ -0,0 +1,42 @@
+set(TENSORFLOW_BASE ${CMAKE_CURRENT_SOURCE_DIR}/tensorflow)
+set(TENSORFLOW_LITE_BASE ${TENSORFLOW_BASE}/tensorflow/contrib/lite)
+set(EXTERNAL_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/tensorflow/contrib/lite/downloads")
+
+include(eigen3.cmake)
+
+if(BUILD_IS_NATIVE AND NOT HOST_ARCH_BASE STREQUAL "arm")
+ add_subdirectory(${EXTERNAL_SRC_DIR}/flatbuffers)
+endif()
+
+#
+# Tensorflow Lite library
+#
+file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c" "${TENSORFLOW_LITE_BASE}/*.cc")
+file(GLOB TFLITE_CORE_TESTS "${TENSORFLOW_LITE_BASE}/*test*.cc")
+list(REMOVE_ITEM TFLITE_CORE_SRCS ${TFLITE_CORE_TESTS})
+
+file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
+file(GLOB_RECURSE TFLITE_KERNEL_TESTS "${TENSORFLOW_LITE_BASE}/kernels/*test*.cc")
+list(REMOVE_ITEM TFLITE_KERNEL_SRCS ${TFLITE_KERNEL_TESTS})
+
+list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
+list(APPEND TFLITE_SRCS "${EXTERNAL_SRC_DIR}/farmhash/src/farmhash.cc")
+
+list(APPEND TFLITE_INCLUDES "${EXTERNAL_SRC_DIR}/")
+list(APPEND TFLITE_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow")
+list(APPEND TFLITE_INCLUDES "${EXTERNAL_SRC_DIR}/gemmlowp")
+list(APPEND TFLITE_INCLUDES "${EXTERNAL_SRC_DIR}/neon_2_sse")
+list(APPEND TFLITE_INCLUDES "${EXTERNAL_SRC_DIR}/farmhash/src")
+list(APPEND TFLITE_INCLUDES "${EXTERNAL_SRC_DIR}/flatbuffers/include")
+
+add_library(tensorflow-lite ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite PUBLIC ${TFLITE_INCLUDES})
+target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
+target_link_libraries(tensorflow-lite eigen3 ${LIB_PTHREAD} dl)
+
+install(TARGETS tensorflow-lite ARCHIVE DESTINATION lib)
+
+if(BUILD_ACL)
+ include(acl.cmake)
+endif(BUILD_ACL)
diff --git a/externals/acl.cmake b/externals/acl.cmake
new file mode 100644
index 0000000..206ada6
--- /dev/null
+++ b/externals/acl.cmake
@@ -0,0 +1,150 @@
+###
+### ARM Compute Library
+###
+set(ACL_BASE ${CMAKE_CURRENT_SOURCE_DIR}/acl)
+set(ACL_GENERATED ${CMAKE_CURRENT_BINARY_DIR}/acl_generated)
+set(ACL_VERSION_TAG "${ACL_GENERATED}/arm_compute_version.embed")
+
+# Create 'arm_compute_version.embed'
+add_custom_command(OUTPUT ${ACL_VERSION_TAG}
+ COMMAND mkdir -p "${ACL_GENERATED}"
+ COMMAND echo '"unknown"' > "${ACL_VERSION_TAG}")
+
+file(GLOB_RECURSE ACL_UTIL_SRCS "${ACL_BASE}/src/core/utils/*.cpp")
+
+### ARM Compute Library - Foundation library (such as I/O and logging)
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_foundation ${ACL_UTIL_SRCS})
+ target_include_directories(acl_foundation PUBLIC "${ACL_BASE}")
+ target_include_directories(acl_foundation PUBLIC "${ACL_BASE}/include")
+ target_link_libraries(acl_foundation dl pthread)
+endif(BUILD_ACL_STATIC_LIB)
+
+###
+### ARM Compute Library Common (Core & Runtime)
+###
+file(GLOB ACL_CORE_COMMON_SRCS "${ACL_BASE}/src/core/*.cpp")
+list(APPEND ACL_CORE_COMMON_SRCS ${ACL_VERSION_TAG})
+# Both CL & NEON runtime funtions use these CPP kernels
+list(APPEND ACL_CORE_COMMON_SRCS "${ACL_BASE}/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp")
+list(APPEND ACL_CORE_COMMON_SRCS "${ACL_BASE}/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp")
+list(APPEND ACL_CORE_COMMON_SRCS "${ACL_BASE}/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp")
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_core_common ${ACL_CORE_COMMON_SRCS})
+ target_include_directories(acl_core_common PUBLIC "${ACL_GENERATED}")
+ target_link_libraries(acl_core_common acl_foundation)
+endif(BUILD_ACL_STATIC_LIB)
+
+file(GLOB ACL_RUNTIME_COMMON_SRCS "${ACL_BASE}/src/runtime/*.cpp")
+# src/runtime/Scheduler.cpp depends on this scheduler
+list(APPEND ACL_RUNTIME_COMMON_SRCS "${ACL_BASE}/src/runtime/CPP/SingleThreadScheduler.cpp")
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_core_opencl ${ACL_CORE_OPENCL_SRCS})
+ target_link_libraries(acl_core_opencl acl_core_common OpenCL)
+endif(BUILD_ACL_STATIC_LIB)
+
+###
+### ARM Compute Library Open CL (Core & Runtime & Example)
+###
+file(GLOB ACL_CORE_OPENCL_SRCS "${ACL_BASE}/src/core/CL/*.cpp")
+file(GLOB ACL_CORE_OPENCL_KERNEL_SRCS "${ACL_BASE}/src/core/CL/kernels/*.cpp")
+list(APPEND ACL_CORE_OPENCL_SRCS ${ACL_CORE_OPENCL_KERNEL_SRCS})
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_runtime_opencl ${ACL_RUNTIME_OPENCL_SRCS})
+ target_link_libraries(acl_runtime_opencl acl_runtime_common acl_core_opencl)
+endif(BUILD_ACL_STATIC_LIB)
+
+file(GLOB_RECURSE ACL_RUNTIME_OPENCL_SRCS "${ACL_BASE}/src/runtime/CL/*.cpp")
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_core_neon ${ACL_CORE_NEON_SRCS})
+ target_include_directories(acl_core_neon PUBLIC "${ACL_BASE}/arm_compute/core/NEON/kernels/assembly")
+ target_link_libraries(acl_core_neon acl_core_common)
+endif(BUILD_ACL_STATIC_LIB)
+
+###
+### ARM Compute Library NEON (Core & Runtime & Example)
+###
+file(GLOB ACL_CORE_NEON_SRCS "${ACL_BASE}/src/core/NEON/kernels/*.cpp" "${ACL_BASE}/src/core/NEON/kernels/arm32/*.cpp")
+file(GLOB_RECURSE ACL_CORE_NEON_CONVOLUTION_SRCS "${ACL_BASE}/src/core/NEON/kernels/convolution/winograd/*.cpp" "${ACL_BASE}/src/core/NEON/kernels/convolution/depthwise/*.cpp")
+list(APPEND ACL_CORE_NEON_SRCS ${ACL_CORE_NEON_CONVOLUTION_SRCS})
+list(APPEND ACL_CORE_NEON_SRCS "${ACL_BASE}/src/core/CPP/ICPPSimpleKernel.cpp")
+list(APPEND ACL_CORE_NEON_SRCS "${ACL_BASE}/src/core/CPP/kernels/CPPPermuteKernel.cpp")
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_runtime_neon ${ACL_RUNTIME_NEON_SRCS})
+ target_link_libraries(acl_runtime_neon acl_runtime_common acl_core_neon)
+endif(BUILD_ACL_STATIC_LIB)
+
+file(GLOB_RECURSE ACL_RUNTIME_NEON_SRCS "${ACL_BASE}/src/runtime/NEON/*.cpp")
+# runtime/NEON/functions/NEWinogradLayer.h use this implementation
+list(APPEND ACL_RUNTIME_NEON_SRCS "${ACL_BASE}/src/runtime/CPP/ICPPSimpleFunction.cpp")
+list(APPEND ACL_RUNTIME_NEON_SRCS "${ACL_BASE}/src/runtime/CPP/functions/CPPPermute.cpp")
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_graph ${ACL_GRAPH_SRCS})
+ target_link_libraries(acl_graph acl_runtime_opencl acl_runtime_neon)
+endif(BUILD_ACL_STATIC_LIB)
+
+# TODO Support Open MP core(?)
+# TODO Support Open GLES core(?)
+
+###
+### ARM Compute Library (Graph & Example)
+###
+file(GLOB ACL_GRAPH_COMMON_SRCS "${ACL_BASE}/src/graph/*.cpp" "${ACL_BASE}/src/graph/nodes/*.cpp")
+file(GLOB ACL_GRAPH_OPENCL_SRCS "${ACL_BASE}/src/graph/CL/*.cpp" "${ACL_BASE}/src/graph/operations/CL*.cpp")
+file(GLOB ACL_GRAPH_NEON_SRCS "${ACL_BASE}/src/graph/NE/*.cpp" "${ACL_BASE}/src/graph/operations/NE*.cpp")
+
+list(APPEND ACL_GRAPH_SRCS ${ACL_GRAPH_COMMON_SRCS})
+list(APPEND ACL_GRAPH_SRCS ${ACL_GRAPH_OPENCL_SRCS})
+list(APPEND ACL_GRAPH_SRCS ${ACL_GRAPH_NEON_SRCS})
+
+if(BUILD_ACL_STATIC_LIB)
+ add_library(acl_graph ${ACL_GRAPH_SRCS})
+ target_link_libraries(acl_graph acl_runtime_opencl acl_runtime_neon)
+endif(BUILD_ACL_STATIC_LIB)
+
+###
+### ARM Compute Shared Libraries
+###
+list(APPEND ACL_CORE_SRCS ${ACL_UTIL_SRCS})
+list(APPEND ACL_CORE_SRCS ${ACL_CORE_COMMON_SRCS})
+list(APPEND ACL_CORE_SRCS ${ACL_CORE_OPENCL_SRCS})
+list(APPEND ACL_CORE_SRCS ${ACL_CORE_NEON_SRCS})
+
+add_library(arm_compute_core SHARED ${ACL_CORE_SRCS})
+target_include_directories(arm_compute_core PUBLIC "${ACL_GENERATED}")
+target_include_directories(arm_compute_core PUBLIC "${ACL_BASE}")
+target_include_directories(arm_compute_core PUBLIC "${ACL_BASE}/include")
+target_include_directories(arm_compute_core PUBLIC "${ACL_BASE}/arm_compute/core/NEON/kernels/assembly")
+target_link_libraries(arm_compute_core dl pthread)
+
+list(APPEND ACL_RUNTIME_SRCS ${ACL_RUNTIME_COMMON_SRCS})
+list(APPEND ACL_RUNTIME_SRCS ${ACL_RUNTIME_OPENCL_SRCS})
+list(APPEND ACL_RUNTIME_SRCS ${ACL_RUNTIME_NEON_SRCS})
+
+add_library(arm_compute SHARED ${ACL_RUNTIME_SRCS})
+target_link_libraries(arm_compute arm_compute_core OpenCL)
+
+add_library(arm_compute_graph SHARED ${ACL_GRAPH_SRCS})
+target_link_libraries(arm_compute_graph arm_compute)
+
+add_library(arm_compute_test SHARED "${ACL_BASE}/utils/Utils.cpp")
+target_link_libraries(arm_compute_test arm_compute)
+
+add_library(arm_compute_graph_test SHARED "${ACL_BASE}/utils/GraphUtils.cpp")
+target_link_libraries(arm_compute_graph_test arm_compute_graph arm_compute_test)
+
+add_executable(cl_convolution "${ACL_BASE}/examples/cl_convolution.cpp")
+target_compile_definitions(cl_convolution PRIVATE ARM_COMPUTE_CL)
+target_link_libraries(cl_convolution arm_compute_test)
+
+add_executable(neon_convolution "${ACL_BASE}/examples/neon_convolution.cpp")
+target_link_libraries(neon_convolution arm_compute_test)
+
+add_executable(graph_lenet "${ACL_BASE}/examples/graph_lenet.cpp")
+target_link_libraries(graph_lenet arm_compute_graph_test)
diff --git a/externals/eigen3.cmake b/externals/eigen3.cmake
new file mode 100644
index 0000000..bc8bf59
--- /dev/null
+++ b/externals/eigen3.cmake
@@ -0,0 +1,12 @@
+#
+# Eigen 3
+#
+set(EIGEN_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/tensorflow/contrib/lite/downloads/eigen")
+file(GLOB_RECURSE EIGEN3_HDRS "${EIGEN_DIR}/*.h")
+
+# NOTE From 3.x, CMake supports INTERFACE library feature. Unfortunately,
+# CMake 2.8.12 (which Ubuntu 14.04 provides) does not support this
+# library feature.
+add_library(eigen3 STATIC ${EIGEN3_HDRS})
+set_target_properties(eigen3 PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(eigen3 PUBLIC "${EIGEN_DIR}")
diff --git a/include/NeuralNetworks.h b/include/NeuralNetworks.h
new file mode 100644
index 0000000..beaf6be
--- /dev/null
+++ b/include/NeuralNetworks.h
@@ -0,0 +1,1929 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @addtogroup NeuralNetworks
+ * @{
+ */
+
+/**
+ * @file NeuralNetworks.h
+ */
+
+#ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+#define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+
+/******************************************************************
+ *
+ * IMPORTANT NOTICE:
+ *
+ * This file is part of Android's set of stable system headers
+ * exposed by the Android NDK (Native Development Kit).
+ *
+ * Third-party source AND binary code relies on the definitions
+ * here to be FROZEN ON ALL UPCOMING PLATFORM RELEASES.
+ *
+ * - DO NOT MODIFY ENUMS (EXCEPT IF YOU ADD NEW 32-BIT VALUES)
+ * - DO NOT MODIFY CONSTANTS OR FUNCTIONAL MACROS
+ * - DO NOT CHANGE THE SIGNATURE OF FUNCTIONS IN ANY WAY
+ * - DO NOT CHANGE THE LAYOUT OR SIZE OF STRUCTURES
+ */
+
+#if __ANDROID_API__ >= __ANDROID_API_O_MR1__
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+
+/**
+ * Operand types.
+ *
+ * The type of operands that can be added to a model.
+ *
+ * Although we define many types, most operators accept just a few
+ * types. Most used are {@link ANEURALNETWORKS_TENSOR_FLOAT32},
+ * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+ * and {@link ANEURALNETWORKS_INT32}.
+ */
+typedef enum {
+ /** The following entries are used to declare scalars. */
+
+ /** A 32 bit floating point scalar value. */
+ ANEURALNETWORKS_FLOAT32 = 0,
+ /** A signed 32 bit integer scalar value. */
+ ANEURALNETWORKS_INT32 = 1,
+ /** An unsigned 32 bit integer scalar value. */
+ ANEURALNETWORKS_UINT32 = 2,
+
+ /** The following entries are used to declare tensors. */
+
+ /** A tensor of 32 bit floating point values. */
+ ANEURALNETWORKS_TENSOR_FLOAT32 = 3,
+ /** A tensor of 32 bit integer values. */
+ ANEURALNETWORKS_TENSOR_INT32 = 4,
+ /** A tensor of 8 bit integers that represent real numbers.
+ *
+ * Attached to this tensor are two numbers that can be used to convert
+ * the 8 bit integer to the real value and vice versa. These two numbers are:
+ * - scale: a 32 bit non-negative floating point value.
+ * - zeroPoint: an 32 bit integer, in range [0, 255].
+ *
+ * The formula is:
+ * real_value = (integer_value - zeroPoint) * scale.
+ */
+ ANEURALNETWORKS_TENSOR_QUANT8_ASYMM = 5,
+} OperandCode;
+
+/**
+ * Operation types.
+ *
+ * The type of operations that can be added to a model.
+ */
+typedef enum {
+ /** Adds two tensors, element-wise.
+ *
+ * Takes two input tensors of identical type and compatible dimensions. The output
+ * is the sum of both input tensors, optionally modified by an activation function.
+ *
+ * Two dimensions are compatible when:
+ * 1. they are equal, or
+ * 2. one of them is 1
+ *
+ * The size of the output is the maximum size along each dimension of the input operands.
+ * It starts with the trailing dimensions, and works its way forward.
+ *
+ * Example:
+ *
+ * input1.dimension = {4, 1, 2}
+ * input2.dimension = {5, 4, 3, 1}
+ * output.dimension = {5, 4, 3, 2}
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor.
+ * * 1: A tensor of the same type, and compatible dimensions as input0.
+ * * 2: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The sum, a tensor of the same type as input0.
+ */
+ ANEURALNETWORKS_ADD = 0,
+
+ /** Performs a 2-D average pooling operation.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * sum_{i, j}(input[batch, row + i, col + j, channel]) / sum(1)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" (i.e., Num_samples, Height, Width, and Channels)
+ * data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 6: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the filter width.
+ * * 8: An INT32 value, specifying the filter height.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 2: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the filter width.
+ * * 5: An INT32 value, specifying the filter height.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_AVERAGE_POOL_2D = 1,
+
+ /** Concatenates the input tensors along the given dimension.
+ *
+ * The input tensors must have identical type and the same dimensions except the
+ * dimension along the concatenation axis.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0 ~ n-1: The list of n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm].
+ * For inputs of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, all
+ * input tensors must have the same scale and zeroPoint.
+ * * n: An INT32 value, specifying the concatenation axis.
+ *
+ * Outputs:
+ * * 0: The output, a tensor of the same type as the input tensors.
+ * The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm].
+ */
+ ANEURALNETWORKS_CONCATENATION = 2,
+
+ /** Performs an 2-D convolution operation.
+ *
+ * The CONV_2D op sweeps a 2-D filter that can mix channels together over a batch of
+ * images, applying the filter to each window of each image of the appropriate size.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * sum_{i, j} (
+ * input[batch, row + i, col + j, k] *
+ * filter[channel, row + i, col + j, k] +
+ * bias[channel]
+ * )
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 8: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 4: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out].
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
+ */
+ ANEURALNETWORKS_CONV_2D = 3,
+
+ /** Performs a depthwise 2-D convolution operation.
+ *
+ * Given an input tensor of shape [batches, height, width, depth_in] and a filter
+ * tensor of shape [1, filter_height, filter_width, depth_out] containing
+ * depth_out convolutional filters of depth 1, DEPTHWISE_CONV applies a different
+ * filter to each input channel (expanding from 1 channel to channel_multiplier channels
+ * for each), then concatenates the results together.
+ *
+ * The output has depth_out = depth_in * depth_multiplier channels.
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[b, i, j, k * channel_multiplier + q] =
+ * sum_{di, dj} (
+ * input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+ * filter[1, di, dj, k * channel_multiplier + q]
+ * )
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 8: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 9: An INT32 value, specifying the depthwise multiplier.
+ * * 10: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
+ * specifying the filter.
+ * * 2: A 1-D tensor, of shape [depth_out], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 4: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 6: An INT32 value, specifying the depthwise multiplier.
+ * * 7: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out].
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
+ */
+ ANEURALNETWORKS_DEPTHWISE_CONV_2D = 4,
+
+ /** Rearranges data from depth into blocks of spatial data.
+ *
+ * More specifically, this op outputs a copy of the input tensor where values from
+ * the depth dimension are moved in spatial blocks to the height and width dimensions.
+ * The value block_size indicates the input block size and how the data is moved.
+ *
+ * Chunks of data of size block_size * block_size from depth are rearranged into
+ * non-overlapping blocks of size block_size x block_size.
+ *
+ * The width of the output tensor is input_depth * block_size, whereas the height is
+ * input_height * block_size.
+ * The depth of the input tensor must be divisible by block_size * block_size
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and
+ * block_size * block_size must be a divisor of the input depth.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batch, height*block_size, width*block_size,
+ * depth/(block_size*block_size)].
+ */
+ ANEURALNETWORKS_DEPTH_TO_SPACE = 5,
+
+ /** Dequantizes the input tensor.
+ *
+ * The formula is:
+ *
+ * output = (input - zeroPoint) * scale.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0, but with type
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ */
+ ANEURALNETWORKS_DEQUANTIZE = 6,
+
+ /** Looks up sub-tensors in the input tensor.
+ *
+ * This operator takes for input a tensor of values (Values) and
+ * a one-dimensional tensor of selection indices (Lookups).
+ * The output tensor is the concatenation of sub-tensors of Values as
+ * selected by Lookups.
+ *
+ * Think of Values as being sliced along its first dimension:
+ * The entries in Lookups select which slices are concatenated together
+ * to create the output tensor.
+ *
+ * For example, if Values has shape of [40, 200, 300] and
+ * Lookups has shape of [3], we would expect all three values
+ * found in Lookups to be between 0 and 39. The resulting tensor will
+ * have shape of [3, 200, 300].
+ *
+ * If a value in Lookups is out of bounds, the operation will fail
+ * and an error will be reported.
+ *
+ * Inputs:
+ * * 0: Lookups. A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_INT32} type.
+ * The values are indices into the first dimension of Values.
+ * * 1: Values. An n-D tensor, where n >= 2, from which sub-tensors are
+ * extracted.
+ *
+ * Output:
+ * * 0: A n-D tensor with the same rank and shape as the Values
+ * tensor, except for the first dimension which has the same size
+ * as Lookups' only dimension.
+ */
+ ANEURALNETWORKS_EMBEDDING_LOOKUP = 7,
+
+ /** Computes element-wise floor() on the input tensor.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor.
+ *
+ * Outputs:
+ * * 0: The output tensor, of the same type and dimensions as the input tensor.
+ */
+ ANEURALNETWORKS_FLOOR = 8,
+
+ /** Denotes a fully (densely) connected layer, which connects all elements in the input
+ * tensor with each element in the output tensor.
+ *
+ * This layer implements the operation:
+ *
+ * outputs = activation(inputs * weights’ + bias)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input. If rank is greater than 2, then it gets flattened to
+ * a 2-D Tensor. The 2-D Tensor is handled as if dimensions corresponded to shape
+ * [batch_size, input_size], where “batch_size” corresponds to the batching dimension,
+ * and “input_size” is the size of the input.
+ * * 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where
+ * "num_units" corresponds to the number of output nodes.
+ * * 2: A 1-D tensor, of shape [num_units], specifying the bias.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should
+ * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}.
+ * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias
+ * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
+ * bias_scale == input_scale * filter_scale.
+ * * 3: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output tensor, of shape [batch_size, num_units].
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input_scale * filter_scale.
+ */
+ ANEURALNETWORKS_FULLY_CONNECTED = 9,
+
+ /** Looks up sub-tensors in the input tensor using a key-value map.
+ *
+ * This operator takes for input a tensor of values (Values),
+ * a one-dimensional tensor of selection values (Lookups) and
+ * a one-dimensional tensor that maps these values to Values
+ * indexes. The output tensor is the concatenation of sub-tensors of
+ * Values as selected by Lookups via Keys.
+ *
+ * Think of Values as being sliced along its outer-most dimension.
+ * The output is a concatenation of selected slices, with one slice
+ * for each entry of Lookups. The slice selected is the one at the
+ * same index as the Maps entry that matches the value in Lookups.
+ *
+ * For a hit, the corresponding sub-tensor of Values is included
+ * in the Output tensor. For a miss, the corresponding sub-tensor in
+ * Output will have zero values.
+ *
+ * For example, if Values has shape of [40, 200, 300],
+ * Keys should have a shape of [40]. If Lookups tensor has shape
+ * of [3], we're concatenating three slices, so the resulting tensor
+ * will have the shape of [3, 200, 300]. If the first entry in
+ * Lookups has the value 123456, we'll look for that value in Keys tensor.
+ * If the sixth entry of Keys contains 123456, we'll select the sixth
+ * slice of Values. If no entry in Keys has 123456, a slice of zeroes
+ * will be concatenated.
+ *
+ * Inputs:
+ * * 0: Lookups. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [ k ].
+ * * 1: Keys. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [ n ];
+ * Keys and Values pair represent a map, i.e., the ith element
+ * in Keys (Keys[i]) is the key to select the ith sub-tensor
+ * in Values (Values[i]), where 0 <= i <= n-1.
+ * Keys tensor *MUST* be sorted in ascending order.
+ * * 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension must be n.
+ *
+ * Outputs:
+ * * 0: Output. A tensor with shape [ k …].
+ * * 1: Hits. A boolean tensor with shape [ k ] indicates whether the lookup
+ * hits (True) or not (False).
+ * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0 and scale 1.0f.
+ * A non-zero byte represents True, a hit. A zero indicates otherwise.
+ */
+ ANEURALNETWORKS_HASHTABLE_LOOKUP = 10,
+
+ /** Applies L2 normalization along the depth dimension.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * input[batch, row, col, channel] /
+ * sqrt(sum_{c} pow(input[batch, row, col, c], 2))
+ *
+ * For input tensor with more dimensions, independently normalizes each 1-D slice along dimension dim.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout (i.e., Num_samples, Height, Width, and Channels).
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth].
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_L2_NORMALIZATION = 11,
+
+ /** Performs an 2-D L2 pooling operation.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / sum(1))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 6: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the filter width.
+ * * 8: An INT32 value, specifying the filter height.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 2: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the filter width.
+ * * 5: An INT32 value, specifying the filter height.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_L2_POOL_2D = 12,
+
+ /** Applies Local Response Normalization along the depth dimension.
+ *
+ * The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the last
+ * dimension), and each vector is normalized independently. Within a given vector,
+ * each component is divided by the weighted, squared sum of inputs within depth_radius.
+ *
+ * The output is calculated using this formula:
+ *
+ * sqr_sum[a, b, c, d] =
+ * sum(pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2)
+ * output = input / pow((bias + alpha * sqr_sum), beta)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the radius of the normalization window.
+ * * 2: A FLOAT32 value, specifying the bias, must not be zero.
+ * * 3: A FLOAT32 value, specifying the scale factor, alpha.
+ * * 4: A FLOAT32 value, specifying the exponent, beta.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION = 13,
+
+ /** Computes sigmoid activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = 1 / (1 + exp(-input))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type,
+ * the scale must be 1.f / 256 and the zeroPoint must be 0.
+ */
+ ANEURALNETWORKS_LOGISTIC = 14,
+
+ /**
+ * Projects an input to a bit vector via locality senstive hashing.
+ *
+ * Inputs:
+ * * 0: Hash functions. Dim.size == 2, DataType: Float.
+ * Tensor[0].Dim[0]: Number of hash functions.
+ * Tensor[0].Dim[1]: Number of seeds per hash functions.
+ * Tensor[0].Dim[1] <= 32 in sparse case.
+ *
+ * * 1: Input. Dim.size >= 1, no restriction on DataType.
+ * * 2: Weight. Optional. Dim.size == 1, DataType: Float.
+ * If not set, each input element is considered to have the same weight of
+ * 1.0.
+ * Tensor[1].Dim[0] == Tensor[2].Dim[0]
+ * * 3: Type:
+ * Sparse: Value LSHProjectionType_SPARSE(=1).
+ * Computed bit vector is considered to be sparse.
+ * Each output element is an int32 made up of multiple bits computed from
+ * hash functions.
+ *
+ * Dense: Value LSHProjectionType_DENSE(=2).
+ * Computed bit vector is considered to be dense. Each output element
+ * represents a bit and can take the value of either 0 or 1.
+ *
+ * Outputs:
+ * * 0: If the projection type is sparse:
+ * Output.Dim == { Tensor[0].Dim[0] }
+ * A tensor of int32 that represents hash signatures.
+ * If the projection type is Dense:
+ * Output.Dim == { Tensor[0].Dim[0] * Tensor[0].Dim[1] }
+ * A flattened tensor that represents projected bit vectors.
+ */
+ ANEURALNETWORKS_LSH_PROJECTION = 15,
+
+ /**
+ * Long short-term memory unit (LSTM) recurrent network layer.
+ *
+ * The default non-peephole implementation is based on:
+ * http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
+ * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
+ * Computation, 9(8):1735-1780, 1997.
+ *
+ * The peephole implementation is based on:
+ * https://research.google.com/pubs/archive/43905.pdf
+ * Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory
+ * recurrent neural network architectures for large scale acoustic modeling."
+ * INTERSPEECH, 2014.
+ *
+ * The coupling of input and forget gate (CIFG) is based on:
+ * http://arxiv.org/pdf/1503.04069.pdf
+ * Greff et al. "LSTM: A Search Space Odyssey"
+ *
+ * The class has the following independently optional inputs:
+ * * If input gate (if CIFG): “input_to_forget_weights”,
+ * “recurrent_to_input_weights”, “cell_to_input_weights”, “input_gate_bias”.
+ * * If no peephole connections: “cell_to_input_weights”,
+ * “cell_to_forget_weights”, “cell_to_output_weights”.
+ * * If no projection layer: “projection_weights” and “projection_bias”.
+ * * If no projection bias: “projection_bias”.
+ *
+ * Supported tensor types (type T):
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Inputs:
+ * * 0: Input.
+ * A 2-D tensor of type T, of shape [batch_size, input_size], where
+ * “batch_size” corresponds to the batching dimension, and “input_size”
+ * is the size of the input.
+ * * 1: input_to_input_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size], where
+ * “num_units” corresponds to the number of cell units.
+ * * 2: input_to_forget_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size].
+ * * 3: input_to_cell_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size].
+ * * 4: input_to_output_weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size].
+ * * 5: recurrent_to_input_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size], where
+ * “output_size” corresponds to either the number of cell units (i.e.,
+ * “num_units”), or the second dimension of the “projection_weights”, if
+ * defined.
+ * * 6: recurrent_to_forget_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size].
+ * * 7: recurrent_to_cell_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size].
+ * * 8: recurrent_to_output_weights.
+ * A 2-D tensor of type T, of shape [num_units, output_size].
+ * * 9: cell_to_input_weights.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 10:cell_to_forget_weights.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 11:cell_to_output_weights.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 12:input_gate_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 13:forget_gate_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 14:cell_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 15:output_gate_bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 16:projection_weights.
+ * A 2-D tensor of type T, of shape [output_size, num_units].
+ * * 17:projection_bias.
+ * A 1-D tensor of type T, of shape [output_size].
+ * * 18: output_state (in).
+ * A 2-D tensor of type T, of shape [batch_size, output_size].
+ * * 19: cell_state (in).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ * * 20:fused_activation_function.
+ * An optional {@link FuseCode} value indicating the activation
+ * function.
+ * If “NONE” is specified then it results in a linear activation.
+ * * 21:cell_clip.
+ * A clipping threshold for the cell state, such that values are bound
+ * within [-cell_clip, cell_clip]. If set to 0.0 then clipping is
+ * disabled.
+ * * 22:proj_clip.
+ * A clipping threshold for the output from the projection layer, such
+ * that values are bound within [-proj_clip, proj_clip]. If set to 0.0
+ * then clipping is disabled.
+ *
+ * Outputs:
+ * * 0: scratch_buffer.
+ * A 3-D tensor of type T, of shape [batch_size, num_cell, 4].
+ * * 1: output_state (out).
+ * A 2-D tensor of type T, of shape [batch_size, output_size].
+ * * 2: cell_state (out).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ * * 3: output.
+ * A 2-D tensor of type T, of shape [batch_size, output_size]. This is
+ * effectively the same as the current “output_state” value.
+ */
+ ANEURALNETWORKS_LSTM = 16,
+
+ /** Performs an 2-D max pooling operation.
+ *
+ * The output dimensions are functions of the filter dimensions, stride, and padding.
+ *
+ * The values in the output tensor are computed as:
+ *
+ * output[batch, row, col, channel] =
+ * max_{i, j} (input[batch, row + i, col + j, channel])
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Both explicit padding and implicit padding are supported.
+ *
+ * Inputs (explicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension.
+ * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
+ * * 5: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 6: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 7: An INT32 value, specifying the filter width.
+ * * 8: An INT32 value, specifying the filter height.
+ * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Inputs (implicit padding):
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
+ * {@link PaddingCode} values.
+ * * 2: An INT32 value, specifying the stride when walking through input
+ * in the ‘width’ dimension.
+ * * 3: An INT32 value, specifying the stride when walking through input
+ * in the ‘height’ dimension.
+ * * 4: An INT32 value, specifying the filter width.
+ * * 5: An INT32 value, specifying the filter height.
+ * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth].
+ */
+ ANEURALNETWORKS_MAX_POOL_2D = 17,
+
+ /** Multiplies two tensors, element-wise.
+ *
+ * Takes two input tensors of identical type and compatible dimensions. The output
+ * is the product of both input tensors, optionally modified by an activation function.
+ *
+ * Two dimensions are compatible when:
+ * 1. they are equal, or
+ * 2. one of them is 1
+ *
+ * The size of the resulting output is the maximum size along each dimension of the
+ * input operands. It starts with the trailing dimensions, and works its way forward.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4
+ *
+ * Inputs:
+ * * 0: A tensor.
+ * * 1: A tensor of the same type, and compatible dimensions as input0.
+ * * 2: An INT32 value, and has to be one of the {@link FuseCode} values.
+ * Specifies the activation to invoke on the result of each addition.
+ *
+ * Outputs:
+ * * 0: The product, a tensor of the same type as input0.
+ * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following
+ * condition must be satisfied: output_scale > input1_scale * input2_scale.
+ */
+ ANEURALNETWORKS_MUL = 18,
+
+ /** Computes rectified linear activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = max(0, input)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_RELU = 19,
+
+ /** Computes rectified linear 1 activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = min(1.f, max(-1.f, input))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_RELU1 = 20,
+
+ /** Computes rectified linear 6 activation on the input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = min(6, max(0, input))
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_RELU6 = 21,
+
+ /** Reshapes a tensor.
+ *
+ * Given tensor, this operation returns a tensor that has the same values as tensor,
+ * but with a newly specified shape.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the tensor to be reshaped.
+ * * 1: A 1-D tensor of type {@link ANEURALNETWORKS_TENSOR_INT32}, defining the shape
+ * of the output tensor. The number of elements implied by shape must be the same
+ * as the number of elements in the input tensor.
+ *
+ * Outputs:
+ * * 0: The output tensor, of shape specified by the input shape.
+ */
+ ANEURALNETWORKS_RESHAPE = 22,
+
+ /** Resizes images to given size using the bilinear interpretation.
+ *
+ * Resized images will be distorted if their output aspect ratio is not the same as
+ * input aspect ratio.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
+ * * 1: An INT32 value, specifying the output height of the output tensor.
+ * * 2: An INT32 value, specifying the output width of the output tensor.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batches, new_height, new_width, depth].
+ */
+ ANEURALNETWORKS_RESIZE_BILINEAR = 23,
+
+ /**
+ * A basic recurrent neural network layer.
+ *
+ * This layer implements the operation:
+ * outputs = state = activation(inputs * input_weights + state * recurrent_weights + bias)
+ *
+ * Where:
+ * * “input_weights” is a weight matrix that multiplies the inputs;
+ * * “recurrent_weights” is a weight matrix that multiplies the current
+ * “state” which itself is the output from the previous time step
+ * computation;
+ * * “bias” is a bias vector (added to each output vector in the batch);
+ * * “activation” is the function passed as the “fused_activation_function”
+ * argument (if not “NONE”).
+ *
+ * Supported tensor types (Type T):
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Inputs:
+ * * 0: input.
+ * A 2-D tensor of type T, of shape [batch_size, input_size], where
+ * “batch_size” corresponds to the batching dimension, and “input_size” is
+ * the size of the input.
+ * * 1: weights.
+ * A 2-D tensor of type T, of shape [num_units, input_size], where
+ * “num_units” corresponds to the number of units.
+ * * 2: recurrent_weights.
+ * A 2-D tensor of type T, of shape [num_units, num_units], with columns
+ * corresponding to the weights from each unit.
+ * * 3: bias.
+ * A 1-D tensor of type T, of shape [num_units].
+ * * 4: hidden state (in).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ * * 5: fused_activation_function.
+ * An optional {@link FuseCode} value indicating the activation
+ * function. If “NONE” is specified then it results in a linear
+ * activation.
+ *
+ * Outputs:
+ * * 0: hidden state (out).
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ *
+ * * 1: output.
+ * A 2-D tensor of type T, of shape [batch_size, num_units]. This is
+ * effectively the same as the current state value.
+ */
+ ANEURALNETWORKS_RNN = 24,
+
+ /** Computes the softmax activation on the input tensor element-wise, per batch, by
+ * normalizing the input vector so the maximum coefficient is zero.
+ *
+ * The output is calculated using this formula:
+ *
+ * output[batch, i] =
+ * exp((input[batch, i] - max(input[batch, :])) * beta) /
+ * sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)}
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 2 or 4.
+ *
+ * Inputs:
+ * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped.
+ * * 1: A FLOAT32 value, specifying the positive scaling factor for the exponent, beta.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type,
+ * the scale must be 1.f / 256 and the zeroPoint must be 0.
+ */
+ ANEURALNETWORKS_SOFTMAX = 25,
+
+ /** Rearranges blocks of spatial data, into depth.
+ *
+ * More specifically, this op outputs a copy of the input tensor where values from
+ * the height and width dimensions are moved to the depth dimension.
+ * The value block_size indicates the input block size and how the data is moved.
+ *
+ * Chunks of data of size block_size * block_size from depth are rearranged into
+ * non-overlapping blocks of size block_size x block_size.
+ *
+ * The depth of the output tensor is input_depth * block_size * block_size.
+ * The input tensor's height and width must be divisible by block_size.
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+ *
+ * Supported tensor rank: 4, with "NHWC" data layout.
+ *
+ * Inputs:
+ * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input.
+ * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and
+ * block_size must be a divisor of both the input height and width.
+ *
+ * Outputs:
+ * * 0: The output 4-D tensor, of shape [batch, height/block_size, width/block_size,
+ * depth*block_size*block_size].
+ */
+ ANEURALNETWORKS_SPACE_TO_DEPTH = 26,
+
+ /**
+ * SVDF op is a kind of stateful layer derived from the notion that a
+ * densely connected layer that's processing a sequence of input frames can
+ * be approximated by using a singular value decomposition of each of its
+ * nodes. The implementation is based on:
+ *
+ * https://research.google.com/pubs/archive/43813.pdf
+ *
+ * P. Nakkiran, R. Alvarez, R. Prabhavalkar, C. Parada.
+ * “Compressing Deep Neural Networks using a Rank-Constrained Topology”.
+ * INTERSPEECH, 2015.
+ *
+ * It processes the incoming input using a 2-stage filtering mechanism:
+ * * stage 1 performs filtering on the "features" dimension, whose outputs get
+ * pushed into a memory of fixed-size memory_size.
+ * * stage 2 performs filtering on the "time" dimension of the memory_size
+ * memoized outputs of stage 1.
+ *
+ * Specifically, for rank 1, this layer implements the operation:
+ *
+ * memory = push(conv1d(inputs, weights_feature, feature_dim,
+ * "ANEURALNETWORKS_PADDING_VALID"));
+ * outputs = activation(memory * weights_time + bias);
+ *
+ * Where:
+ * * “weights_feature” is a weights matrix that processes the inputs (by
+ * convolving the input with every “feature filter”), and whose outputs get
+ * pushed, stacked in order, into the fixed-size “memory” (the oldest entry
+ * gets dropped);
+ * * “weights_time” is a weights matrix that processes the “memory” (by a
+ * batched matrix multiplication on the num_units);
+ * * “bias” is an optional bias vector (added to each output vector in the
+ * batch); and
+ * * “activation” is the function passed as the “fused_activation_function”
+ * argument (if not “NONE”).
+ *
+ * Each rank adds a dimension to the weights matrices by means of stacking
+ * the filters.
+ *
+ * Supported tensor types (type T):
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Inputs:
+ * * 0: input.
+ * A 2-D tensor of type T, of shape [batch_size, input_size], where
+ * “batch_size” corresponds to the batching dimension, and “input_size” is
+ * the size of the input.
+ * * 1: weights_feature.
+ * A 2-D tensor of type T, of shape [num_units, input_size], where
+ * “num_units” corresponds to the number of units.
+ * * 2: weights_time.
+ * A 2-D tensor of type T, of shape [num_units, memory_size], where
+ * “memory_size” corresponds to the fixed-size of the memory.
+ * * 3: bias.
+ * An optional 1-D tensor of type T, of shape [num_units].
+ * * 4: state (in).
+ * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank].
+ * * 5: rank.
+ * The rank of the SVD approximation.
+ * * 6: fused_activation_function.
+ * An optional {@link FuseCode} value indicating the activation function.
+ * If “NONE” is specified then it results in a linear activation.
+ *
+ * Outputs:
+ * * 0: state (out).
+ * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank].
+ * * 1: output.
+ * A 2-D tensor of type T, of shape [batch_size, num_units].
+ */
+ ANEURALNETWORKS_SVDF = 27,
+
+ /** Computes hyperbolic tangent of input tensor element-wise.
+ *
+ * The output is calculated using this formula:
+ *
+ * output = tanh(input)
+ *
+ * Supported tensor types:
+ * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ *
+ * Supported tensor rank: up to 4.
+ *
+ * Inputs:
+ * * 0: A tensor, specifying the input.
+ *
+ * Outputs:
+ * * 0: The output tensor of same shape as input0.
+ */
+ ANEURALNETWORKS_TANH = 28,
+} OperationCode;
+
+/**
+ * Fused activation function types.
+ *
+ */
+typedef enum {
+ /** NO fused activation function. */
+ ANEURALNETWORKS_FUSED_NONE = 0,
+ /** Fused ReLU activation function. */
+ ANEURALNETWORKS_FUSED_RELU = 1,
+ /** Fused ReLU1 activation function. */
+ ANEURALNETWORKS_FUSED_RELU1 = 2,
+ /** Fused ReLU6 activation function. */
+ ANEURALNETWORKS_FUSED_RELU6 = 3,
+} FuseCode;
+
+/**
+ * Implicit padding algorithms.
+ *
+ */
+typedef enum {
+ /**
+ * SAME padding.
+ * Padding on both ends are the "same":
+ * padding_to_beginning = total_padding / 2
+ * padding_to_end = (total_padding + 1)/2.
+ * i.e., for even number of padding, padding to both ends are exactly
+ * the same; for odd number of padding, padding to the ending is bigger
+ * than the padding to the beginning by 1.
+ *
+ * total_padding is a function of input, stride and filter size.
+ * It could be computed as follows:
+ * out_size = (input + stride - 1) / stride;
+ * needed_input = (out_size - 1) * stride + filter_size
+ * total_padding = max(0, needed_input - output_size)
+ * The computation is the same for the horizontal and vertical directions.
+ */
+ ANEURALNETWORKS_PADDING_SAME = 1,
+
+ /**
+ * VALID padding.
+ * No padding. When the input size is not evenly divisible by
+ * the filter size, the input at the end that could not fill
+ * the whole filter tile will simply be ignored.
+ */
+ ANEURALNETWORKS_PADDING_VALID = 2,
+} PaddingCode;
+
+/**
+ * Execution preferences.
+ */
+typedef enum {
+ /**
+ * Prefer executing in a way that minimizes battery drain.
+ * This is desirable for compilations that will be executed often.
+ */
+ ANEURALNETWORKS_PREFER_LOW_POWER = 0,
+ /**
+ * Prefer returning a single answer as fast as possible, even if this causes
+ * more power consumption.
+ */
+ ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER = 1,
+ /**
+ * Prefer maximizing the throughput of successive frames, for example when
+ * processing successive frames coming from the camera.
+ */
+ ANEURALNETWORKS_PREFER_SUSTAINED_SPEED = 2,
+} PreferenceCode;
+
+/**
+ * Result codes.
+ */
+typedef enum {
+ ANEURALNETWORKS_NO_ERROR = 0,
+ ANEURALNETWORKS_OUT_OF_MEMORY = 1,
+ ANEURALNETWORKS_INCOMPLETE = 2,
+ ANEURALNETWORKS_UNEXPECTED_NULL = 3,
+ ANEURALNETWORKS_BAD_DATA = 4,
+ ANEURALNETWORKS_OP_FAILED = 5,
+ ANEURALNETWORKS_UNMAPPABLE = 5,
+ ANEURALNETWORKS_BAD_STATE = 6,
+} ResultCode;
+
+/**
+ * For {@link ANeuralNetworksModel_setOperandValue}, values with a
+ * length smaller or equal to this will be immediately copied into
+ * the model. The size is in bytes.
+ */
+enum {
+ ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128
+};
+
+/**
+ * ANeuralNetworksMemory is an opaque type that represents memory.
+ *
+ * This type is used to represent shared memory, memory mapped files,
+ * and similar memories.
+ *
+ * By using shared memory, a program can efficiently communicate to the
+ * runtime and drivers the tensors that define a model. See
+ * {@link ANeuralNetworksModel_setOperandValueFromMemory}. An application
+ * should typically create one shared memory object that contains every tensor
+ * needed to define a model. {@link ANeuralNetworksMemory_createFromFd} can be
+ * used to create shared memory from a file handle. {@link ANeuralNetworksMemory_createShared}
+ * can be used to directly created shared memory.
+ *
+ * Memory objects can also be used to specify the input and output arguments of
+ * an execution. See {@link ANeuralNetworksExecution_setInputFromMemory}
+ * and {@link ANeuralNetworksExecution_setOutputFromMemory}.
+ */
+typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
+
+/**
+ * ANeuralNetworksModel is an opaque type that contains a description of the
+ * mathematical operations that constitute the model.
+ *
+ * <p>The model will be built by calling<ul>
+ * <li>{@link ANeuralNetworksModel_create},</li>
+ * <li>{@link ANeuralNetworksModel_addOperation},</li>
+ * <li>{@link ANeuralNetworksModel_addOperand},</li>
+ * </ul>
+ *
+ * A model is completed by calling {@link ANeuralNetworksModel_finish}.
+ * A model is destroyed by calling {@link ANeuralNetworksModel_free}.
+ *
+ * <p>A model cannot be modified once {@link ANeuralNetworksModel_finish}
+ * has been called on it.</p>
+ *
+ * <p>It is the application's responsibility to make sure that only one thread
+ * modifies a model at a given time. It is however safe for more than one
+ * thread to use the model once {@link ANeuralNetworksModel_finish} has returned.</p>
+ *
+ * <p>It is also the application's responsibility to ensure that there are no other
+ * uses of the model after calling {@link ANeuralNetworksModel_free}.
+ * This includes any compilation or execution object created using the model.</p>
+ */
+typedef struct ANeuralNetworksModel ANeuralNetworksModel;
+
+/**
+ * ANeuralNetworksCompilation is an opaque type that can be used to compile
+ * a machine learning model.
+ *
+ * <p>To use:<ul>
+ * <li>Create a new compilation instance by calling the
+ * {@link ANeuralNetworksCompilation_create} function.</li>
+ * <li>Set any desired properties on the compilation (for example,
+ * {@link ANeuralNetworksCompilation_setPreference}).</li>
+ * <li>Complete the compilation with {@link ANeuralNetworksCompilation_finish}.</li>
+ * <li>Use the compilation as many times as needed
+ * with {@link ANeuralNetworksExecution_create}.</li>
+ * <li>Destroy the compilation with {@link ANeuralNetworksCompilation_free}
+ * once all executions using the compilation have completed.</li></ul></p>
+ *
+ * A compilation is completed by calling {@link ANeuralNetworksCompilation_finish}.
+ * A compilation is destroyed by calling {@link ANeuralNetworksCompilation_free}.
+ *
+ * <p>A compilation cannot be modified once {@link ANeuralNetworksCompilation_finish}
+ * has been called on it.</p>
+ *
+ * <p>It is the application's responsibility to make sure that only
+ * one thread modifies a compilation at a given time. It is however
+ * safe for more than one thread to use the compilation once
+ * {@link ANeuralNetworksCompilation_finish} has returned.</p>
+ *
+ * <p>It is also the application's responsibility to ensure that there are no other
+ * uses of the compilation after calling {@link ANeuralNetworksCompilation_free}.
+ * This includes any execution object created using the compilation.</p>
+ */
+typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation;
+
+/**
+ * ANeuralNetworksExecution is an opaque type that can be used to apply a machine
+ * learning model to a set of inputs.
+ *
+ * <p>To use:<ul>
+ * <li>Create a new execution instance by calling the
+ * {@link ANeuralNetworksExecution_create} function.</li>
+ * <li>Associate data to the model inputs with
+ * {@link ANeuralNetworksExecution_setInput} or
+ * {@link ANeuralNetworksExecution_setInputFromMemory}.</li>
+ * <li>Associate output buffers to the model outputs with
+ * {@link ANeuralNetworksExecution_setOutput} or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.</li>
+ * <li>Apply the model with {@link ANeuralNetworksExecution_startCompute}.</li>
+ * <li>Wait for the execution to complete with {@link
+ * ANeuralNetworksEvent_wait}.</li>
+ * <li>Destroy the execution with
+ * {@link ANeuralNetworksExecution_free}.</li></ul></p>
+ *
+ * <p>An execution cannot be modified once {@link ANeuralNetworksExecution_startCompute}
+ * has been called on it.</p>
+ *
+ * <p>An execution can be applied to a model with
+ * {@link ANeuralNetworksExecution_startCompute} only once. Create new executions
+ * to do new evaluations of the model.</p>
+ *
+ * <p>It is the application's responsibility to make sure that only one thread
+ * modifies an execution at a given time. It is however safe for more than one
+ * thread to use {@link ANeuralNetworksEvent_wait} at the same time.</p>
+ *
+ * <p>It is also the application's responsibility to ensure that there are no other
+ * uses of the request after calling {@link ANeuralNetworksExecution_free}.</p>
+ */
+typedef struct ANeuralNetworksExecution ANeuralNetworksExecution;
+
+/**
+ * ANeuralNetworksOperandType describes the type of an operand.
+ * This structure is used to describe both scalars and tensors.
+ */
+typedef struct ANeuralNetworksOperandType {
+ /** The data type, e.g ANEURALNETWORKS_INT8. */
+ int32_t type;
+ /** The number of dimensions. It should be 0 for scalars. */
+ uint32_t dimensionCount;
+ /** The dimensions of the tensor. It should be nullptr for scalars. */
+ const uint32_t* dimensions;
+ /** These two fields are only used for quantized tensors.
+ * They should be zero for scalars and non-fixed point tensors.
+ * The dequantized value of each entry is (value - zeroPoint) * scale.
+ */
+ float scale;
+ int32_t zeroPoint;
+} ANeuralNetworksOperandType;
+
+typedef int32_t ANeuralNetworksOperationType;
+
+/**
+ * ANeuralNetworksEvent is an opaque type that represents an event
+ * that will be signaled once an execution completes.
+ */
+typedef struct ANeuralNetworksEvent ANeuralNetworksEvent;
+
+
+/**
+ * Creates a shared memory object from a file descriptor.
+ *
+ * The shared memory is backed by a file descriptor via mmap.
+ * See {@link ANeuralNetworksMemory} for a description on how to use
+ * this shared memory.
+ *
+ * @param size The requested size in bytes.
+ * Must not be larger than the file size.
+ * @param prot The desired memory protection for the mapping.
+ * It is either PROT_NONE or the bitwise OR of one or
+ * more of the following flags: PROT_READ, PROT_WRITE.
+ * @param fd The requested file descriptor.
+ * The file descriptor has to be mmap-able. The file
+ * descriptor will be duplicated.
+ * @param offset The offset to the beginning of the file of the area to map.
+ * The offset has to be aligned to a page size.
+ * @param memory The memory object to be created.
+ * Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
+ */
+int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
+ ANeuralNetworksMemory** memory);
+
+/**
+ * Delete a memory object.
+ *
+ * Destroys the object used by the run time to keep track of the memory.
+ * This will free the underlying actual memory if no other code has open
+ * handles to this memory.
+ *
+ * @param memory The memory object to be freed.
+ */
+void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory);
+
+/**
+ * Create an empty {@link ANeuralNetworksModel}.
+ *
+ * <p>This only creates the object. Computation is performed once
+ * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ *
+ * The model should be constructed with calls to
+ * {@link ANeuralNetworksModel_addOperation} and
+ * {@link ANeuralNetworksModel_addOperand}
+ *
+ * <p>{@link ANeuralNetworksModel_finish} should be called once the model
+ * has been fully constructed.</p>
+ *
+ * <p>{@link ANeuralNetworksModel_free} should be called once the model
+ * is no longer needed.</p>
+ *
+ * @param model The {@link ANeuralNetworksModel} to be created.
+ * Set to NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_create(ANeuralNetworksModel** model);
+
+/**
+ * Destroy a model.
+ *
+ * The model need not have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksModel_free(ANeuralNetworksModel* model);
+
+/**
+ * Indicate that we have finished modifying a model. Required before
+ * calling {@link ANeuralNetworksCompilation_create}.
+ *
+ * An application is responsible to make sure that no other thread uses
+ * the model at the same time.
+ *
+ * This function must only be called once for a given model.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_finish(ANeuralNetworksModel* model);
+
+/**
+ * Add an operand to a model.
+ *
+ * The order in which the operands are added is important. The first one added
+ * to a model will have the index value 0, the second 1, etc. These indexes are
+ * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
+ * {@link ANeuralNetworksExecution_setInput},
+ * {@link ANeuralNetworksExecution_setInputFromMemory},
+ * {@link ANeuralNetworksExecution_setOutput},
+ * {@link ANeuralNetworksExecution_setOutputFromMemory} and
+ * {@link ANeuralNetworksExecution_setOperandValue}.
+ *
+ * To build a model that can accomodate inputs of various sizes, as you may want
+ * to do for a CNN, set the size of the dimensions that will vary at run time to 0.
+ * If you do so, provide the full dimensions when calling
+ * {@link ANeuralNetworksExecution_setInput} or {@link ANeuralNetworksExecution_setInputFromMemory}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param type The {@link ANeuralNetworksOperandType} that describes the shape
+ * of the operand.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_addOperand(ANeuralNetworksModel* model,
+ const ANeuralNetworksOperandType* type);
+
+/**
+ * Sets an operand to a constant value.
+ *
+ * Values of length smaller or equal to
+ * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}
+ * are immediately copied into the model.
+ *
+ * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES},
+ * a pointer to the buffer is stored within the model. The application is responsible
+ * for not changing the content of this region until all executions using this model
+ * have completed. As the data may be copied during processing, modifying the data
+ * after this call yields undefined results.
+ *
+ * For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory}
+ * is likely to be more efficient.
+ *
+ * To indicate that an optional operand should be considered missing,
+ * pass nullptr for buffer and 0 for length.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param buffer A pointer to the data to use.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, int32_t index,
+ const void* buffer, size_t length);
+
+/**
+ * Sets an operand to a value stored in a memory object.
+ *
+ * The content of the memory is not copied. A reference to that memory is stored
+ * inside the model. The application is responsible for not changing the content
+ * of the memory region until all executions using this model have completed.
+ * As the data may be copied during processing, modifying the data after this call
+ * yields undefined results.
+ *
+ * To indicate that an optional operand should be considered missing,
+ * use {@link ANeuralNetworksModel_setOperandValue} instead, passing nullptr for buffer.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @param model The model to be modified.
+ * @param index The index of the model operand we're setting.
+ * @param buffer A pointer to the data to use.
+ * @param memory The memory containing the data.
+ * @param offset This specifies the location of the data within the memory.
+ * The offset is in bytes from the start of memory.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index,
+ const ANeuralNetworksMemory* memory,
+ size_t offset, size_t length);
+
+/**
+ * Add an operation to a model.
+ *
+ * @param model The model to be modified.
+ * @param type The type of the operation.
+ * @param inputCount The number of entries in the inputs array.
+ * @param inputs An array of indexes identifying each operand.
+ * @param outputCount The number of entries in the outputs array.
+ * @param outputs An array of indexes identifying each operand.
+ *
+ * The operands specified by inputs and outputs must have been
+ * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model,
+ ANeuralNetworksOperationType type, uint32_t inputCount,
+ const uint32_t* inputs, uint32_t outputCount,
+ const uint32_t* outputs);
+
+/**
+ * Specfifies which operands will be the model's inputs and outputs.
+ *
+ * An operand cannot be used for both input and output. Doing so will
+ * return an error.
+ *
+ * @param model The model to be modified.
+ * @param inputCount The number of entries in the inputs array.
+ * @param inputs An array of indexes identifying the input operands.
+ * @param outputCount The number of entries in the outputs array.
+ * @param outputs An array of indexes identifying the output operands.
+ *
+ * The operands specified by inputs and outputs must have been
+ * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been
+ * called will return an error.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ *
+ */
+int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount,
+ const uint32_t* inputs, uint32_t outputCount,
+ const uint32_t* outputs);
+
+/**
+ * Create a {@link ANeuralNetworksCompilation} to compile the given model.
+ *
+ * <p>This only creates the object. Compilation is only performed once
+ * {@link ANeuralNetworksCompilation_finish} is invoked.</p>
+ *
+ * <p>{@link ANeuralNetworksCompilation_finish} should be called once
+ * all desired properties have been set on the compilation.</p>
+ *
+ * <p>{@link ANeuralNetworksModel_free} should be called once the compilation
+ * is no longer needed.</p>
+ *
+ * <p>The provided model must outlive the compilation.</p>
+ *
+ * The model must already have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param model The {@link ANeuralNetworksModel} to be compiled.
+ * @param compilation The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ * if the model is invalid.
+ */
+int ANeuralNetworksCompilation_create(ANeuralNetworksModel* model,
+ ANeuralNetworksCompilation** compilation);
+
+/**
+ * Destroy a compilation.
+ *
+ * The compilation need not have been finished by a call to
+ * {@link ANeuralNetworksModel_finish}.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksCompilation_free(ANeuralNetworksCompilation* compilation);
+
+/**
+ * Sets the execution preference.
+ *
+ * <p>Provides guidance to the runtime when trade-offs are possible.</p>
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param preference Either {@link PREFER_LOW_POWER},
+ * {@link PREFER_SINGLE_FAST_ANSWER}, or
+ * {@link PREFER_SUSTAINED_SPEED}.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksCompilation_setPreference(ANeuralNetworksCompilation* compilation,
+ int32_t preference);
+
+/**
+ * Indicate that we have finished modifying a compilation. Required before
+ * calling {@link ANeuralNetworksExecution_create}.
+ *
+ * An application is responsible to make sure that no other thread uses
+ * the compilation at the same time.
+ *
+ * This function must only be called once for a given compilation.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded usage.
+ *
+ * @param compilation The compilation to be finished.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation* compilation);
+
+/**
+ * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
+ * This only creates the object. Computation is only performed once
+ * {@link ANeuralNetworksExecution_startCompute} is invoked.
+ *
+ * <p>The provided compilation must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
+ * @param execution The newly created object or NULL if unsuccessful.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+ * if the compilation is invalid.
+ */
+int ANeuralNetworksExecution_create(ANeuralNetworksCompilation* compilation,
+ ANeuralNetworksExecution** execution);
+
+/**
+ * Destroy an execution.
+ *
+ * <p>If called on an execution for which
+ * {@link ANeuralNetworksExecution_startCompute} has been called, the
+ * function will return immediately but will mark the execution to be deleted
+ * once the computation completes. The related {@link ANeuralNetworksEvent}
+ * will be signaled and the {@link ANeuralNetworksEvent_wait} will return
+ * ANEURALNETWORKS_ERROR_DELETED.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be destroyed. Passing NULL is acceptable and
+ * results in no operation.
+ */
+void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution);
+
+/**
+ * Associate a user buffer with an input of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided buffer must outlive the execution.</p>
+ *
+ * If the input is optional, you can indicate that it is omitted by
+ * passing nullptr for buffer and 0 for length.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the input argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This should be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other properties of the type must be the same as
+ * specified in the model. If the type is the same as specified
+ * when the model was built, NULL can be passed.
+ * @param buffer The buffer containing the data.
+ * @param length The length in bytes of the buffer.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the input.
+ */
+int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type, const void* buffer,
+ size_t length);
+
+/**
+ * Associate part of a memory object with an input of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * <p>The provided memory must outlive the execution.</p>
+ *
+ * If the input is optional, you can indicate that it is omitted by
+ * using @{Link ANeuralNetworks_setInput} instead, passing nullptr for buffer
+ * and 0 for length.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the input argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other values must be the same as specified in the
+ * model. If the type is the same as specified when the model
+ * was built, NULL can be passed.
+ * @param memory The memory containing the data.
+ * @param offset This specifies the location of the data whithin the memory.
+ * The offset is in bytes from the start of memory.
+ * @param length The size in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the input.
+ */
+int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type,
+ const ANeuralNetworksMemory* memory, size_t offset,
+ size_t length);
+
+/**
+ * Associate a user buffer with an output of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * If the output is optional, you can indicate that it is omitted by
+ * passing nullptr for buffer and 0 for length.
+ *
+ * <p>The provided buffer must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the output argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other values must be the same as specified in the
+ * model. If the type is the same as specified when the model
+ * was built, NULL can be passed.
+ * @param buffer The buffer where the data is to be written.
+ * @param length The length in bytes of the buffer.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the output.
+ */
+int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type, void* buffer,
+ size_t length);
+
+/**
+ * Associate part of a memory object with an output of the model of the
+ * {@link ANeuralNetworksExecution}.
+ *
+ * If the output is optional, you can indicate that it is omitted by
+ * using @{Link ANeuralNetworks_setOutput} instead, passing nullptr for buffer
+ * and 0 for length.
+ *
+ * <p>The provided memory must outlive the execution.</p>
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be modified.
+ * @param index The index of the output argument we are setting. It is
+ * an index into the lists passed to
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
+ * the index associated with {@link ANeuralNetworksModel_addOperand}.
+ * @param type The type of the operand. This can be used to specify the
+ * dimensions that were set to 0 when the operand was added to the
+ * model. All other values must be the same as specified in the
+ * model. If the type is the same as specified when the model
+ * was built, NULL can be passed.
+ * @param memory The memory where the data is to be stored.
+ * @param offset This specifies the location of the data whithin the memory.
+ * The offset is in bytes from the start of memory.
+ * @param length The length in bytes of the data value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if the
+ * name is not recognized or the buffer is too small for the output.
+ */
+int ANeuralNetworksExecution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index,
+ const ANeuralNetworksOperandType* type,
+ const ANeuralNetworksMemory* memory, size_t offset,
+ size_t length);
+
+/**
+ * Schedule evaluation of the execution.
+ *
+ * <p>Schedules evaluation of the execution. Once the model has been
+ * applied and the outputs are ready to be consumed, the returned event will be
+ * signaled. Use {@link ANeuralNetworksEvent_wait} to wait for that event.
+ * </p>
+ *
+ * Multiple executions can be scheduled and evaluated concurrently. The
+ * runtime makes no guarantee on the ordering of completion of
+ * executions. If it's important to the application, the application
+ * should enforce the ordering by using
+ * {@link ANeuralNetworksEvent_wait}.
+ *
+ * ANeuralNetworksEvent_wait must be called to recuperate the resources used
+ * by the execution.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @param execution The execution to be scheduled and executed.
+ * @param event The event that will be signaled on completion. event is set to
+ * NULL if there's an error.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution* execution,
+ ANeuralNetworksEvent** event);
+
+/**
+ * Waits until the execution completes.
+ *
+ * More than one thread can wait on an event. When the execution completes,
+ * all threads will be released.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+ */
+int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event);
+
+/**
+ * Destroys the event.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ */
+void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event);
+
+__END_DECLS
+
+#endif // __ANDROID_API__ >= 27
+
+#endif // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H
+
+/** @} */
diff --git a/include/kernel/acl/Concatenation.h b/include/kernel/acl/Concatenation.h
new file mode 100644
index 0000000..3e51851
--- /dev/null
+++ b/include/kernel/acl/Concatenation.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_CONCATENATION_H__
+#define __NNFW_KERNEL_ACL_CONCATENATION_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+ const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+ const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernal
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_CONCATENATION_H__
diff --git a/include/kernel/acl/Conv2D.h b/include/kernel/acl/Conv2D.h
new file mode 100644
index 0000000..5f2cd4d
--- /dev/null
+++ b/include/kernel/acl/Conv2D.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_CONV2D_H__
+#define __NNFW_KERNEL_ACL_CONV2D_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernal
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_CONV2D_H__
diff --git a/include/kernel/acl/DepthwiseConv2D.h b/include/kernel/acl/DepthwiseConv2D.h
new file mode 100644
index 0000000..b6576d2
--- /dev/null
+++ b/include/kernel/acl/DepthwiseConv2D.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_DEPTHWISECONV2D_H__
+#define __NNFW_KERNEL_ACL_DEPTHWISECONV2D_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_DEPTHWISECONV2D_H__
diff --git a/include/kernel/acl/FullyConnected.h b/include/kernel/acl/FullyConnected.h
new file mode 100644
index 0000000..9695d63
--- /dev/null
+++ b/include/kernel/acl/FullyConnected.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_FULLYCONNECTED_H__
+#define __NNFW_KERNEL_ACL_FULLYCONNECTED_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_FULLYCONNECTED_H__
diff --git a/include/kernel/acl/Pooling.h b/include/kernel/acl/Pooling.h
new file mode 100644
index 0000000..0205f99
--- /dev/null
+++ b/include/kernel/acl/Pooling.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_POOLING_H__
+#define __NNFW_KERNEL_ACL_POOLING_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_POOLING_H__
diff --git a/include/kernel/acl/Reshape.h b/include/kernel/acl/Reshape.h
new file mode 100644
index 0000000..418db73
--- /dev/null
+++ b/include/kernel/acl/Reshape.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_RESHAPE_H__
+#define __NNFW_KERNEL_ACL_RESHAPE_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape);
+namespace neon {
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape);
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_RESHAPE_H__
diff --git a/include/kernel/acl/Softmax.h b/include/kernel/acl/Softmax.h
new file mode 100644
index 0000000..d1f7daf
--- /dev/null
+++ b/include/kernel/acl/Softmax.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_SOFTMAX_H__
+#define __NNFW_KERNEL_ACL_SOFTMAX_H__
+
+#include <OperationsUtils.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float beta,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+namespace neon {
+
+bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float beta,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+} // namespace neon
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_SOFTMAX_H__
diff --git a/include/kernel/acl/nnfw_kernel_acl.h b/include/kernel/acl/nnfw_kernel_acl.h
new file mode 100644
index 0000000..7197502
--- /dev/null
+++ b/include/kernel/acl/nnfw_kernel_acl.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_H__
+#define __NNFW_KERNEL_ACL_H__
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+void Initialize(void);
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_H__
diff --git a/include/support/nnapi/feature/Reader.h b/include/support/nnapi/feature/Reader.h
new file mode 100644
index 0000000..bf632f2
--- /dev/null
+++ b/include/support/nnapi/feature/Reader.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_NNAPI_FEATURE_READER_H__
+#define __NNFW_SUPPORT_NNAPI_FEATURE_READER_H__
+
+#include "support/nnapi/feature/Utils.h"
+
+#include "util/feature/Shape.h"
+#include "util/feature/Reader.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace nnapi
+{
+namespace feature
+{
+
+template<typename T> class Reader;
+
+template<> class Reader<float> : public nnfw::util::feature::Reader<float>
+{
+public:
+ Reader(const nnfw::util::feature::Shape &shape, const float *base)
+ : _shape{shape}, _base{base}
+ {
+ // DO NOTHING
+ }
+
+public:
+ float at(uint32_t ch, uint32_t row, uint32_t col) const override
+ {
+ return *(_base + indexOf(_shape, ch, row, col));
+ }
+
+private:
+ nnfw::util::feature::Shape _shape;
+ const float *_base;
+};
+
+} // namespace feature
+} // namespace nnapi
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_NNAPI_FEATURE_READER_H__
diff --git a/include/support/nnapi/feature/Utils.h b/include/support/nnapi/feature/Utils.h
new file mode 100644
index 0000000..e666f54
--- /dev/null
+++ b/include/support/nnapi/feature/Utils.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_NNAPI_FEATURE_UTILS_H__
+#define __NNFW_SUPPORT_NNAPI_FEATURE_UTILS_H__
+
+#include "util/feature/Shape.h"
+
+#include <cstdint>
+
+namespace nnfw
+{
+namespace support
+{
+namespace nnapi
+{
+namespace feature
+{
+
+uint32_t indexOf(const nnfw::util::feature::Shape &shape, uint32_t ch, uint32_t row, uint32_t col);
+
+} // namespace feature
+} // namespace nnapi
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_NNAPI_FEATURE_UTILS_H__
diff --git a/include/support/tflite/Diff.h b/include/support/tflite/Diff.h
new file mode 100644
index 0000000..b17c931
--- /dev/null
+++ b/include/support/tflite/Diff.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_COMPARE_H__
+#define __NNFW_SUPPORT_TFLITE_COMPARE_H__
+
+#include "tensorflow/contrib/lite/interpreter.h"
+
+#include "util/tensor/Index.h"
+
+#include "support/tflite/TensorView.h"
+
+#include <functional>
+#include <vector>
+
+// NOTE The code below is subject to change.
+// TODO Introduce namespaces
+struct TfLiteTensorDiff
+{
+ nnfw::util::tensor::Index index;
+ float expected;
+ float obtained;
+
+ TfLiteTensorDiff(const nnfw::util::tensor::Index &i) : index(i)
+ {
+ // DO NOTHING
+ }
+};
+
+class TfLiteTensorComparator
+{
+public:
+ TfLiteTensorComparator(const std::function<bool (float lhs, float rhs)> &fn) : _compare_fn{fn}
+ {
+ // DO NOTHING
+ }
+
+public:
+ struct Observer
+ {
+ virtual void notify(const nnfw::util::tensor::Index &index, float expected, float obtained) = 0;
+ };
+
+public:
+ // NOTE Observer should live longer than comparator
+ std::vector<TfLiteTensorDiff> compare(const nnfw::support::tflite::TensorView<float> &expected,
+ const nnfw::support::tflite::TensorView<float> &obtained,
+ Observer *observer = nullptr) const;
+
+private:
+ std::function<bool (float lhs, float rhs)> _compare_fn;
+};
+
+class TfLiteInterpMatchApp
+{
+public:
+ TfLiteInterpMatchApp(const TfLiteTensorComparator &comparator)
+ : _verbose{false}, _comparator(comparator)
+ {
+ // DO NOTHING
+ }
+
+public:
+ int &verbose(void) { return _verbose; }
+
+private:
+ int _verbose;
+
+public:
+ bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const;
+
+private:
+ const TfLiteTensorComparator &_comparator;
+};
+
+#include "support/tflite/interp/Builder.h"
+
+#include <random>
+
+// For NNAPI testing
+struct RandomTestParam
+{
+ int verbose;
+ int tolerance;
+};
+
+class RandomTestRunner
+{
+public:
+ RandomTestRunner(int seed, const RandomTestParam &param)
+ : _rand{seed}, _param{param}
+ {
+ // DO NOTHING
+ }
+
+public:
+ // NOTE this method updates '_rand'
+ // Return 0 if test succeeds
+ int run(const nnfw::support::tflite::interp::Builder &builder);
+
+private:
+ std::minstd_rand _rand;
+ const RandomTestParam _param;
+};
+
+#endif // __NNFW_SUPPORT_TFLITE_COMPARE_H__
diff --git a/include/support/tflite/FeatureView.h b/include/support/tflite/FeatureView.h
new file mode 100644
index 0000000..3a7d75e
--- /dev/null
+++ b/include/support/tflite/FeatureView.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_FEATURE_VIEW_H__
+#define __NNFW_SUPPORT_TFLITE_FEATURE_VIEW_H__
+
+#include "tensorflow/contrib/lite/interpreter.h"
+
+#include "support/tflite/InputIndex.h"
+#include "support/tflite/OutputIndex.h"
+
+#include "util/feature/Shape.h"
+#include "util/feature/Reader.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+
+template<typename T> class FeatureView;
+
+template<> class FeatureView<float> : public nnfw::util::feature::Reader<float>
+{
+public:
+ FeatureView(::tflite::Interpreter &interp, const InputIndex &index);
+ FeatureView(::tflite::Interpreter &interp, const OutputIndex &index);
+
+public:
+ float at(uint32_t ch, uint32_t row, uint32_t col) const;
+ float &at(uint32_t ch, uint32_t row, uint32_t col);
+
+private:
+ uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const
+ {
+ uint32_t res = 0;
+
+ // TensorFlow Lite assumes that NHWC ordering for tessor
+ res += row * _shape.W * _shape.C;
+ res += col * _shape.C;
+ res += ch;
+
+ return res;
+ }
+
+private:
+ nnfw::util::feature::Shape _shape;
+ float *_base;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_FEATURE_VIEW_H__
diff --git a/include/support/tflite/InputIndex.h b/include/support/tflite/InputIndex.h
new file mode 100644
index 0000000..c3ed891
--- /dev/null
+++ b/include/support/tflite/InputIndex.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_INPUT_INDEX_H__
+#define __NNFW_SUPPORT_TFLITE_INPUT_INDEX_H__
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+
+class InputIndex
+{
+public:
+ InputIndex(int index) : _index(index)
+ {
+ // DO NOTHING
+ }
+
+public:
+ int asInt(void) const { return _index; }
+
+private:
+ int _index;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_INPUT_INDEX_H__
diff --git a/include/support/tflite/OutputIndex.h b/include/support/tflite/OutputIndex.h
new file mode 100644
index 0000000..be6556c
--- /dev/null
+++ b/include/support/tflite/OutputIndex.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_OUTPUT_INDEX_H__
+#define __NNFW_SUPPORT_TFLITE_OUTPUT_INDEX_H__
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+
+class OutputIndex
+{
+public:
+ OutputIndex(int index) : _index(index)
+ {
+ // DO NOTHING
+ }
+
+public:
+ int asInt(void) const { return _index; }
+
+private:
+ int _index;
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_OUTPUT_INDEX_H__
diff --git a/include/support/tflite/TensorUtils.h b/include/support/tflite/TensorUtils.h
new file mode 100644
index 0000000..815cfcd
--- /dev/null
+++ b/include/support/tflite/TensorUtils.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_TENSOR_UTILS_H__
+#define __NNFW_SUPPORT_TFLITE_TENSOR_UTILS_H__
+
+#include <tensorflow/contrib/lite/context.h>
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+
+inline bool isFloatTensor(const TfLiteTensor *tensor)
+{
+ return tensor->type == kTfLiteFloat32;
+}
+
+inline bool isFeatureTensor(const TfLiteTensor *tensor)
+{
+ return (tensor->dims->size == 4) && (tensor->dims->data[0] == 1);
+}
+
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_TENSOR_UTILS_H__
diff --git a/include/support/tflite/TensorView.h b/include/support/tflite/TensorView.h
new file mode 100644
index 0000000..35c90a3
--- /dev/null
+++ b/include/support/tflite/TensorView.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_TENSOR_VIEW_H__
+#define __NNFW_SUPPORT_TFLITE_TENSOR_VIEW_H__
+
+#include "tensorflow/contrib/lite/interpreter.h"
+
+#include "util/tensor/Shape.h"
+#include "util/tensor/Index.h"
+#include "util/tensor/Reader.h"
+#include "util/tensor/NonIncreasingStride.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+
+template<typename T> class TensorView;
+
+template<> class TensorView<float> final : public nnfw::util::tensor::Reader<float>
+{
+public:
+ TensorView(const nnfw::util::tensor::Shape &shape, float *base);
+
+public:
+ const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+
+public:
+ float at(const nnfw::util::tensor::Index &index) const override;
+ float &at(const nnfw::util::tensor::Index &index);
+
+private:
+ nnfw::util::tensor::Shape _shape;
+
+public:
+ float *_base;
+ nnfw::util::tensor::NonIncreasingStride _stride;
+
+public:
+ // TODO Introduce Operand ID class
+ static TensorView<float> make(::tflite::Interpreter &interp, int operand_id);
+};
+
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_TENSOR_VIEW_H__
diff --git a/include/support/tflite/interp/Builder.h b/include/support/tflite/interp/Builder.h
new file mode 100644
index 0000000..4a5a2f2
--- /dev/null
+++ b/include/support/tflite/interp/Builder.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_INTERP_BUILDER_H__
+#define __NNFW_SUPPORT_TFLITE_INTERP_BUILDER_H__
+
+#include <tensorflow/contrib/lite/interpreter.h>
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+namespace interp
+{
+
+struct Builder
+{
+ virtual ~Builder() = default;
+
+ virtual std::unique_ptr<::tflite::Interpreter> build(void) const = 0;
+};
+
+} // namespace interp
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_INTERP_BUILDER_H__
diff --git a/include/support/tflite/interp/FlatBufferBuilder.h b/include/support/tflite/interp/FlatBufferBuilder.h
new file mode 100644
index 0000000..dab151d
--- /dev/null
+++ b/include/support/tflite/interp/FlatBufferBuilder.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
+#define __NNFW_SUPPORT_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
+
+#include <tensorflow/contrib/lite/model.h>
+
+#include "support/tflite/interp/Builder.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+namespace interp
+{
+
+class FlatBufferBuilder final : public Builder
+{
+public:
+ FlatBufferBuilder(const ::tflite::FlatBufferModel &model) : _model{model}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<::tflite::Interpreter> build(void) const override;
+
+private:
+ const ::tflite::FlatBufferModel &_model;
+};
+
+} // namespace interp
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
diff --git a/include/support/tflite/interp/FunctionBuilder.h b/include/support/tflite/interp/FunctionBuilder.h
new file mode 100644
index 0000000..1ac5918
--- /dev/null
+++ b/include/support/tflite/interp/FunctionBuilder.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_TFLITE_INTERP_FUNCTION_BUILDER_H__
+#define __NNFW_SUPPORT_TFLITE_INTERP_FUNCTION_BUILDER_H__
+
+#include <tensorflow/contrib/lite/model.h>
+
+#include "support/tflite/interp/Builder.h"
+
+namespace nnfw
+{
+namespace support
+{
+namespace tflite
+{
+namespace interp
+{
+
+class FunctionBuilder final : public Builder
+{
+public:
+ using SetupFunc = std::function<void (::tflite::Interpreter &)>;
+
+public:
+ FunctionBuilder(const SetupFunc &fn) : _fn{fn}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::unique_ptr<::tflite::Interpreter> build(void) const override;
+
+private:
+ SetupFunc _fn;
+};
+
+} // namespace interp
+} // namespace tflite
+} // namespace support
+} // namespace nnfw
+
+#endif // __NNFW_SUPPORT_TFLITE_INTERP_FUNCTION_BUILDER_H__
diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt
new file mode 100644
index 0000000..8865a92
--- /dev/null
+++ b/libs/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_subdirectory(util)
+if(BUILD_NN_RUNTIME)
+ add_subdirectory(kernel)
+endif(BUILD_NN_RUNTIME)
+add_subdirectory(support)
diff --git a/libs/kernel/CMakeLists.txt b/libs/kernel/CMakeLists.txt
new file mode 100644
index 0000000..7da5460
--- /dev/null
+++ b/libs/kernel/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(${TARGET_ARCH_BASE} STREQUAL "arm" OR ${TARGET_ARCH_BASE} STREQUAL "aarch64")
+ add_subdirectory(acl)
+endif()
diff --git a/libs/kernel/acl/CMakeLists.txt b/libs/kernel/acl/CMakeLists.txt
new file mode 100644
index 0000000..8f0486e
--- /dev/null
+++ b/libs/kernel/acl/CMakeLists.txt
@@ -0,0 +1,94 @@
+set(LIB_KERNELACL kernelacl)
+set(LIB_KERNELACL_TEST kernelacl_test)
+
+# TODO remove this when default goes to c++14
+if(CMAKE_VERSION VERSION_LESS 3.1.0)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+else(CMAKE_VERSION VERSION_LESS 3.1.0)
+ set(CMAKE_CXX_STANDARD 14)
+endif(CMAKE_VERSION VERSION_LESS 3.1.0)
+
+# runtime information
+set(PATH_RUNTIME_NN ${CMAKE_SOURCE_DIR}/runtimes/nn)
+SET(RUNTIME_INCLUDES ${PATH_RUNTIME_NN}/common/include
+ ${PATH_RUNTIME_NN}/runtime/include
+ ${PATH_RUNTIME_NN}/depend/hal/include
+ ${PATH_RUNTIME_NN}/depend/libhidl/base/include
+ ${PATH_RUNTIME_NN}/depend/libcutils/include
+ ${PATH_RUNTIME_NN}/depend/libutils/include
+ ${PATH_RUNTIME_NN}/depend/android-base/include
+ )
+
+# common
+link_directories(${CMAKE_INSTALL_PREFIX}/lib)
+
+# kernel library
+set(KERNELACL_SRCS "src/Init_acl.cpp"
+ "src/IO_accessor.cpp"
+ "src/shape.cpp"
+ "src/support.cpp"
+ "src/cl/Conv2D.cpp"
+ "src/cl/DepthwiseConv2D.cpp"
+ "src/cl/FullyConnected.cpp"
+ "src/cl/Pooling.cpp"
+ "src/cl/Reshape.cpp"
+ "src/cl/Softmax.cpp"
+ "src/cl/Concatenation.cpp"
+ "src/neon/Conv2D.cpp"
+ "src/neon/DepthwiseConv2D.cpp"
+ "src/neon/FullyConnected.cpp"
+ "src/neon/Pooling.cpp"
+ "src/neon/Softmax.cpp"
+ "src/neon/Reshape.cpp"
+ "src/neon/Concatenation.cpp"
+ )
+
+add_library(${LIB_KERNELACL} SHARED ${KERNELACL_SRCS})
+target_include_directories(${LIB_KERNELACL} PUBLIC
+ ${NNFW_INCLUDE_DIR}
+ ${RUNTIME_INCLUDES}
+ ${NNFW_ACL_INCLUDES}
+ ${CMAKE_SOURCE_DIR}/include
+ )
+target_link_libraries(${LIB_KERNELACL} nnfw_support_nnapi)
+if (${TARGET_OS} STREQUAL "tizen")
+ target_link_libraries(${LIB_KERNELACL} nnfw_util ${NNFW_ACL_LIBS} OpenCL)
+else()
+ target_link_libraries(${LIB_KERNELACL} nnfw_util ${NNFW_ACL_LIBS})
+endif()
+install(TARGETS ${LIB_KERNELACL} DESTINATION lib)
+
+# kernel test executable
+set(KERNELACL_TEST_SRCS "src/util.cpp"
+ "src/gtest_env.cpp"
+ "src/cl/Conv2D.test.cpp"
+ "src/cl/DepthwiseConv2D.test.cpp"
+ "src/cl/FullyConnected.test.cpp"
+ "src/cl/Pooling.test.cpp"
+ "src/cl/Reshape.test.cpp"
+ "src/cl/Softmax.test.cpp"
+ "src/cl/Concatenation.test.cpp"
+ "src/neon/Conv2D.test.cpp"
+ "src/neon/DepthwiseConv2D.test.cpp"
+ "src/neon/FullyConnected.test.cpp"
+ "src/neon/Pooling.test.cpp"
+ "src/neon/Softmax.test.cpp"
+ "src/neon/Reshape.test.cpp"
+ "src/neon/Concatenation.test.cpp"
+ )
+
+add_executable(${LIB_KERNELACL_TEST} ${KERNELACL_TEST_SRCS})
+target_include_directories(${LIB_KERNELACL_TEST} PUBLIC
+ ${NNFW_INCLUDE_DIR}
+ ${RUNTIME_INCLUDES}
+ ${NNFW_ACL_INCLUDES}
+ )
+if (NOT ${TARGET_OS} STREQUAL "tizen")
+ add_dependencies(${LIB_KERNELACL_TEST} googletest)
+endif()
+target_link_libraries(${LIB_KERNELACL_TEST}
+ ${LIB_KERNELACL}
+ nnfw_util ${NNFW_ACL_LIBS}
+ ${NNFW_GTEST_LIBS}
+ )
+install(TARGETS ${LIB_KERNELACL_TEST} DESTINATION unittest)
diff --git a/libs/kernel/acl/src/CLUniqueTensor.h b/libs/kernel/acl/src/CLUniqueTensor.h
new file mode 100644
index 0000000..6844e45
--- /dev/null
+++ b/libs/kernel/acl/src/CLUniqueTensor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__
+#define __NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+class CLUniqueTensor
+{
+public:
+ CLUniqueTensor(const ::arm_compute::TensorInfo &info)
+ {
+ _tensor.allocator()->init(info);
+ }
+
+public:
+ // Both copy and move are not allowed
+ CLUniqueTensor(const CLUniqueTensor &) = delete;
+ CLUniqueTensor(CLUniqueTensor &&) = delete;
+
+public:
+ ~CLUniqueTensor()
+ {
+ _tensor.allocator()->free();
+ }
+
+public:
+ void allocate()
+ {
+ _tensor.allocator()->allocate();
+ }
+
+public:
+ ::arm_compute::CLTensor &ref(void) { return _tensor; }
+ ::arm_compute::CLTensor *ptr(void) { return &_tensor; }
+
+private:
+ ::arm_compute::CLTensor _tensor;
+};
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif //__NNFW_KERNEL_ACL_CLUNIQUETENSOR_H__
diff --git a/libs/kernel/acl/src/DepthwiseConv2D.h b/libs/kernel/acl/src/DepthwiseConv2D.h
new file mode 100644
index 0000000..8af8d4f
--- /dev/null
+++ b/libs/kernel/acl/src/DepthwiseConv2D.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__
+#define __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include "shape.h"
+#include "IO_accessor.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace common {
+
+typedef std::function<void (void)> sync_scheduler_f;
+
+template<class TensorT, class LayerT, class ActT>
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape,
+ sync_scheduler_f sync_scheduler) {
+ auto inputShapeACL = util::fromNNShape(inputShape);
+ auto weightsShapeACL = util::fromNNShape(filterShape);
+ auto biasShapeACL = util::fromNNShape(biasShape);
+ auto outputShapeACL = util::fromNNShape(outputShape);
+
+ TensorT input(arm_compute::TensorInfo(inputShapeACL, arm_compute::Format::F32));
+ TensorT weights(arm_compute::TensorInfo(weightsShapeACL, arm_compute::Format::F32));
+ TensorT bias(arm_compute::TensorInfo(biasShapeACL, arm_compute::Format::F32));
+ TensorT output(arm_compute::TensorInfo(outputShapeACL, arm_compute::Format::F32));
+
+ arm_compute::PadStrideInfo psinfo = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ auto l = std::make_shared<LayerT>();
+ l->configure(input.ptr(), weights.ptr(), bias.ptr(), output.ptr(), psinfo);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ fns.emplace_back(l);
+
+ util::insertFusedActivationLayer<TensorT, ActT>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ weights.allocate();
+
+ // TODO: Do we need 2D tensor accessor for the input feature?
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<WeightAccessor>(weights.ref(), filterData, filterShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ sync_scheduler();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace common
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_DEPTHWISECONV2D_COMMON_H__
diff --git a/libs/kernel/acl/src/DepthwiseConv2D.test.h b/libs/kernel/acl/src/DepthwiseConv2D.test.h
new file mode 100644
index 0000000..b2c8592
--- /dev/null
+++ b/libs/kernel/acl/src/DepthwiseConv2D.test.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/DepthwiseConv2D.h>
+
+// TODO: fix include path in CMakeFiles
+#include "util.h"
+
+#ifndef ACL_TEST
+#error "ACL_TEST should be defined first!"
+#endif // ACL_TEST
+
+#ifndef ACL_CORE_FUNC_NAME
+#error "ACL_CORE_FUNC_NAME should be defined first!"
+#endif // ACL_CORE_FUNC_NAME
+
+using namespace nnfw::kernel::acl;
+
+ACL_TEST(KernelACL_TC, dwise_conv2d_1) {
+ uint32_t input_n = 1;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t input_c = 1;
+ uint32_t filter_h = 3;
+ uint32_t filter_w = 3;
+ uint32_t filter_c = 1;
+ uint32_t out_h = 1;
+ uint32_t out_w = 1;
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t depth_multiplier = 1;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weights({1, filter_h, filter_w, filter_c});
+ util::TensorWrapper bias({filter_c});
+ util::TensorWrapper output({1, out_h, out_w, filter_c});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = input_n;
+ uint32_t H = input_h;
+ uint32_t W = input_w;
+ uint32_t C = input_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ weights.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = 1;
+ uint32_t H = filter_h;
+ uint32_t W = filter_w;
+ uint32_t C = filter_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, out_h, out_w, filter_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 204.f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, dwise_conv2d_multi_channel) {
+ uint32_t input_n = 1;
+ uint32_t input_h = 3;
+ uint32_t input_w = 3;
+ uint32_t input_c = 3;
+ uint32_t filter_h = 3;
+ uint32_t filter_w = 3;
+ uint32_t filter_c = input_c;
+ uint32_t out_h = 1;
+ uint32_t out_w = 1;
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t depth_multiplier = 1;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weights({1, filter_h, filter_w, filter_c});
+ util::TensorWrapper bias({filter_c});
+ util::TensorWrapper output({1, out_h, out_w, filter_c});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = input_n;
+ uint32_t H = input_h;
+ uint32_t W = input_w;
+ uint32_t C = input_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ weights.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = 1;
+ uint32_t H = filter_h;
+ uint32_t W = filter_w;
+ uint32_t C = filter_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, out_h, out_w, filter_c});
+ expected.initValue({
+ 1836.f,
+ 2061.f,
+ 2304.f
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, dwise_conv2d_inception_1) {
+ uint32_t input_n = 1;
+ uint32_t input_h = 112;
+ uint32_t input_w = 112;
+ uint32_t input_c = 32;
+ uint32_t filter_h = 3;
+ uint32_t filter_w = 3;
+ uint32_t filter_c = input_c;
+ uint32_t out_h = 112;
+ uint32_t out_w = 112;
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t depth_multiplier = 1;
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weights({1, filter_h, filter_w, filter_c});
+ util::TensorWrapper bias({filter_c});
+ util::TensorWrapper output({1, out_h, out_w, filter_c});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU6);
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return c;
+ });
+ weights.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return c;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, out_h, out_w, filter_c});
+ expected.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ float v = 9.f;
+ if( h == 0 || h == out_h-1 )
+ v -= 3.f;
+ if( w == 0 || w == out_w-1 )
+ v -= 3.f;
+
+ // four corners
+ if( (w == 0 && h == 0)
+ || (w == 0 && h == out_h-1)
+ || (w == out_w-1 && h == 0)
+ || (w == out_w-1 && h == out_h-1) )
+ v += 1.f;
+
+ // Assumption: negative numbers cannot appear because
+ // only positive numbers exist in the input and weights.
+ float ret = c*c*v;
+ return std::min(ret, 6.f);
+ });
+
+ EXPECT_EQ(output, expected);
+}
diff --git a/libs/kernel/acl/src/FullyConnected.h b/libs/kernel/acl/src/FullyConnected.h
new file mode 100644
index 0000000..5030a85
--- /dev/null
+++ b/libs/kernel/acl/src/FullyConnected.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__
+#define __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include "shape.h"
+#include "IO_accessor.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace common {
+
+typedef std::function<void (void)> sync_scheduler_f;
+
+template<class TensorT, class LayerT, class ActT>
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape,
+ sync_scheduler_f sync_scheduler) {
+
+ // NNAPI specification: https://developer.android.com/ndk/reference/group___neural_networks.html#ggaabbe492c60331b13038e39d4207940e0aaada7a3dbaf4676aba560c933ff610c5
+
+ // According to the NNAPI Specification,
+ // INPUT
+ // 1. input rank is up to 4.
+ // 2. if input rank > 2, it is flattened to rank 2 [batch_size, input_size]
+ nnfw::rt::Shape flattenedInputShape = inputShape;
+ switch(inputShape.dimensions.size()) {
+ case 1:
+ {
+ assert("Need to be implemented." && 0);
+ break;
+ }
+ case 2:
+ {
+ // DO NOTHING.
+ break;
+ }
+ case 3:
+ {
+ assert("Need to be implemented." && 0);
+ break;
+ }
+ case 4:
+ {
+ auto N = inputShape.dimensions[0];
+ auto H = inputShape.dimensions[1];
+ auto W = inputShape.dimensions[2];
+ auto C = inputShape.dimensions[3];
+ flattenedInputShape.dimensions = {N, H*W*C};
+ break;
+ }
+ default:
+ assert(inputShape.dimensions.size() <= 4);
+ }
+ // Finally, flattenedInputShape is a 2D tensor.
+
+ // WEIGHTS is a 2D tensor
+ assert(weightsShape.dimensions.size() == 2);
+
+ // BIAS is a 1D tensor
+ assert(biasShape.dimensions.size() == 1);
+
+ // OUTPUT is a 2D tensor.
+ assert(outputShape.dimensions.size() == 2);
+
+ auto input_shape = util::fromNNShape(flattenedInputShape);
+ auto weights_shape = util::fromNNShape(weightsShape);
+ auto bias_shape = util::fromNNShape(biasShape);
+ auto output_shape = util::fromNNShape(outputShape);
+
+ assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ TensorT input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ TensorT output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+ TensorT bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+ TensorT weights(arm_compute::TensorInfo(weights_shape, arm_compute::Format::F32));
+
+ auto fc = std::make_shared<LayerT>();
+ fc->configure(input.ptr(), weights.ptr(), bias.ptr(), output.ptr());
+
+ fns.emplace_back(fc);
+
+ if (ANEURALNETWORKS_FUSED_RELU == activation)
+ {
+ auto relu_f = std::make_shared<ActT>();
+
+ const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+
+ // Do in-place update
+ relu_f->configure(output.ptr(), nullptr, relu_info);
+
+ fns.emplace_back(relu_f);
+ }
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ weights.allocate();
+
+ // TODO: Do we need 2D tensor accessor for the input feature?
+ TensorAccess<MatrixWeightAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<MatrixWeightAccessor>(weights.ref(), weightsData, weightsShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ sync_scheduler();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace common
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_FULLYCONNECTED_COMMON_H__
diff --git a/libs/kernel/acl/src/FullyConnected.test.h b/libs/kernel/acl/src/FullyConnected.test.h
new file mode 100644
index 0000000..01bbff8
--- /dev/null
+++ b/libs/kernel/acl/src/FullyConnected.test.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/FullyConnected.h>
+
+// TODO: fix include path in CMakeFiles
+#include "util.h"
+
+#ifndef ACL_TEST
+#error "ACL_TEST should be defined first!"
+#endif // ACL_TEST
+
+#ifndef ACL_CORE_FUNC_NAME
+#error "ACL_CORE_FUNC_NAME should be defined first!"
+#endif // ACL_CORE_FUNC_NAME
+
+using namespace nnfw::kernel::acl;
+using fullyConnectedFloat32T = bool (*)(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape);
+
+ACL_TEST(KernelACL_TC, fcFloat32_1) {
+
+ util::TensorWrapper input({1,1,1,100});
+ util::TensorWrapper weights({1,100});
+ util::TensorWrapper bias({1});
+ util::TensorWrapper output({1,1});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ weights.initValue([](uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1});
+ expected.initValue([](uint32_t h, uint32_t w) {
+ return 100.f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_relu) {
+
+ util::TensorWrapper input({1,1,1,100});
+ util::TensorWrapper weights({1,100});
+ util::TensorWrapper bias({1});
+ util::TensorWrapper output({1,1});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ weights.initValue([](uint32_t h, uint32_t w) {
+ return -1.f;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weights.ptr<float>(), weights.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1});
+ expected.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_conv_fc) {
+ uint32_t input_n = 1;
+ uint32_t input_c = 5;
+ uint32_t input_h = 4;
+ uint32_t input_w = 4;
+ uint32_t weight_n = 6;
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ util::TensorWrapper input({input_n, input_h, input_w, input_c});
+ util::TensorWrapper weight({weight_n, input_c*input_h*input_w});
+ util::TensorWrapper bias({weight_n});
+ util::TensorWrapper output({1, weight_n});
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ uint32_t N = input_n;
+ uint32_t H = input_h;
+ uint32_t W = input_w;
+ uint32_t C = input_c;
+
+ return n*H*W*C + h*W*C + w*C + c;
+ });
+
+ weight.initValue([&](uint32_t h, uint32_t w) {
+ uint32_t H = weight_n;
+ uint32_t W = input_c*input_h*input_w;
+
+ return h*W + w;
+ });
+
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weight.ptr<float>(), weight.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, weight_n});
+ expected.initValue({
+ 167480.f,
+ 420280.f,
+ 673080.f,
+ 925880.f,
+ 1178680.f,
+ 1431480.f});
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_fc_fc) {
+ uint32_t input_n = 6;
+ uint32_t weight_n = 6;
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ util::TensorWrapper input({1, input_n});
+ util::TensorWrapper weight({weight_n, input_n});
+ util::TensorWrapper bias({weight_n});
+ util::TensorWrapper output({1, weight_n});
+
+ input.initValue([&](uint32_t h, uint32_t w) {
+ // not use h because h = 0.
+ return (float)w;
+ });
+
+ weight.initValue([&](uint32_t h, uint32_t w) {
+ uint32_t H = weight_n;
+ uint32_t W = input_n;
+
+ return (float)(h*W + w);
+ });
+
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weight.ptr<float>(), weight.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, weight_n});
+ expected.initValue({
+ 55.f,
+ 145.f,
+ 235.f,
+ 325.f,
+ 415.f,
+ 505.f,
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+ACL_TEST(KernelACL_TC, fcFloat32_inceptionv3) {
+
+ uint32_t input_c = 2048;
+ uint32_t weight_n = 1008;
+
+ util::TensorWrapper input({1,1,1,input_c});
+ util::TensorWrapper weight({weight_n,input_c});
+ util::TensorWrapper bias({weight_n});
+ util::TensorWrapper output({1, weight_n});
+
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+
+ input.initValue([&](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.f;
+ });
+ weight.initValue([&](uint32_t h, uint32_t w) {
+ return (float)h;
+ });
+ bias.initValue([](uint32_t w) {
+ return 0.f;
+ });
+ output.initValue([](uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ bool bret = ACL_CORE_FUNC_NAME(input.ptr<float>(), input.shape(),
+ weight.ptr<float>(), weight.shape(),
+ bias.ptr<float>(), bias.shape(),
+ activation,
+ output.ptr<float>(), output.shape());
+
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1, weight_n});
+ expected.initValue([&](uint32_t h, uint32_t w) {
+ return w*input_c;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
diff --git a/libs/kernel/acl/src/IO_accessor.cpp b/libs/kernel/acl/src/IO_accessor.cpp
new file mode 100644
index 0000000..410fb8e
--- /dev/null
+++ b/libs/kernel/acl/src/IO_accessor.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IO_accessor.h"
+
+#include <cassert>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+InputAccessor::InputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+MatrixInputAccessor::MatrixInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+VectorInputAccessor::VectorInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape)
+ : _inputData(inputData)
+ , _inputShape(inputShape)
+{
+}
+
+WeightAccessor::WeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape)
+ : _filterData(filterData)
+ , _filterShape(filterShape)
+{
+}
+
+MatrixWeightAccessor::MatrixWeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape)
+ : _filterData(filterData)
+ , _filterShape(filterShape)
+{
+}
+
+BiasAccessor::BiasAccessor(const float* biasData, const nnfw::rt::Shape& biasShape)
+ : _biasData(biasData)
+ , _biasShape(biasShape)
+{
+}
+
+OutputAccessor::OutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+MatrixOutputAccessor::MatrixOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+VectorOutputAccessor::VectorOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape)
+ : _outputData(outputData)
+ , _outputShape(outputShape)
+{
+}
+
+static uint32_t getOffsetNCHW(const nnfw::rt::Shape& shape, const arm_compute::Coordinates& id)
+{
+ // get offset for ACL(NCHW) from data of NNAPI(NHWC)
+ uint32_t num = getSizeOfDimension(shape, 0);
+ uint32_t height = getSizeOfDimension(shape, 1);
+ uint32_t width = getSizeOfDimension(shape, 2);
+ uint32_t chann = getSizeOfDimension(shape, 3);
+ uint32_t stride = 1;
+ uint32_t offset = 0;
+ uint32_t numdim = id.num_dimensions();
+ offset += numdim > 0 ? id[0] * stride : 0; stride *= width;
+ offset += numdim > 1 ? id[1] * stride : 0; stride *= height;
+ offset += numdim > 2 ? id[2] * stride : 0; stride *= chann;
+ offset += numdim > 3 ? id[3] * stride : 0; stride *= num;
+ return offset;
+}
+
+static uint32_t getElementOffset(const nnfw::rt::Shape& shape,
+ uint32_t ch, uint32_t row, uint32_t col)
+{
+ assert(getSizeOfDimension(shape, 0) == 1);
+ assert(shape.dimensions.size() == 4);
+
+ // TODO Optimize this!
+ const uint32_t W = getSizeOfDimension(shape, 2);
+ const uint32_t C = getSizeOfDimension(shape, 3);
+
+ int offset = 0;
+
+ // NNAPI uses NHWC ordering
+ offset += row * W * C;
+ offset += col * C;
+ offset += ch;
+
+ return offset;
+}
+
+static uint32_t getElementOffset(const nnfw::rt::Shape& shape,
+ uint32_t nth, uint32_t ch, uint32_t row, uint32_t col)
+{
+ assert(shape.dimensions.size() == 4);
+
+ // TODO Optimize this!
+ const uint32_t H = getSizeOfDimension(shape, 1);
+ const uint32_t W = getSizeOfDimension(shape, 2);
+ const uint32_t C = getSizeOfDimension(shape, 3);
+
+ int offset = 0;
+
+ // NNAPI uses NHWC ordering
+ offset += nth * H * W * C;
+ offset += row * W * C;
+ offset += col * C;
+ offset += ch;
+
+ return offset;
+}
+
+bool InputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t ch = id[2];
+ const uint32_t row = id[1];
+ const uint32_t col = id[0];
+
+ uint32_t offset = getElementOffset(_inputShape, ch, row, col);
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool MatrixInputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() <= 2);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const auto row = id[1];
+ const auto col = id[0];
+ const auto W = tensor.info()->tensor_shape().x();
+
+ const auto offset = row * W + col;
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool VectorInputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() == 1);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t offset = id[0];
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_inputData + offset);
+ });
+ return true;
+}
+
+bool WeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t nth = id[3];
+ const uint32_t ch = id[2];
+ const uint32_t row = id[1];
+ const uint32_t col = id[0];
+
+ uint32_t offset = getElementOffset(_filterShape, nth, ch, row, col);
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_filterData + offset);
+ });
+ return true;
+}
+
+bool MatrixWeightAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() <= 2);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const auto row = id[1];
+ const auto col = id[0];
+ const auto W = tensor.info()->tensor_shape().x();
+
+ uint32_t offset = row * W + col;
+
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_filterData + offset);
+ });
+ return true;
+}
+
+bool BiasAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ uint32_t offset = getOffsetNCHW(_biasShape, id);
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id)) =
+ *(_biasData + offset);
+ });
+ return true;
+}
+
+bool OutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t ch = id[2];
+ const uint32_t row = id[1];
+ const uint32_t col = id[0];
+
+ uint32_t offset = getElementOffset(_outputShape, ch, row, col);
+
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
+
+bool VectorOutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() == 1);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const uint32_t x = id[0];
+
+ uint32_t offset = x;
+
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
+
+bool MatrixOutputAccessor::access_tensor(arm_compute::ITensor &tensor)
+{
+ arm_compute::Window window;
+ window.use_tensor_dimensions(tensor.info()->tensor_shape());
+
+ assert(tensor.info()->tensor_shape().num_dimensions() <= 2);
+
+ execute_window_loop(window, [&](const arm_compute::Coordinates& id)
+ {
+ const auto row = id[1];
+ const auto col = id[0];
+ const auto W = tensor.info()->tensor_shape().x();
+
+ const auto offset = row * W + col;
+
+ *(_outputData + offset) =
+ *reinterpret_cast<float *>(tensor.ptr_to_element(id));
+ });
+ return false; // end the network
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/IO_accessor.h b/libs/kernel/acl/src/IO_accessor.h
new file mode 100644
index 0000000..e7670f1
--- /dev/null
+++ b/libs/kernel/acl/src/IO_accessor.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
+#define __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
+
+#include <arm_compute/graph/ITensorAccessor.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/NEON/NEFunctions.h>
+
+#include <OperationsUtils.h> // for nnfw::rt::Shape
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+class InputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ InputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape);
+ InputAccessor(InputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const nnfw::rt::Shape& _inputShape;
+};
+
+class MatrixInputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ MatrixInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape);
+ MatrixInputAccessor(MatrixInputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const nnfw::rt::Shape& _inputShape;
+};
+
+class VectorInputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ VectorInputAccessor(const float* inputData, const nnfw::rt::Shape& inputShape);
+ VectorInputAccessor(VectorInputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _inputData;
+ const nnfw::rt::Shape& _inputShape;
+};
+
+class WeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ WeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape);
+ WeightAccessor(WeightAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _filterData;
+ const nnfw::rt::Shape& _filterShape;
+};
+
+class MatrixWeightAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ MatrixWeightAccessor(const float* filterData, const nnfw::rt::Shape& filterShape);
+ MatrixWeightAccessor(MatrixWeightAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _filterData;
+ const nnfw::rt::Shape& _filterShape;
+};
+
+class BiasAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ BiasAccessor(const float* biasData, const nnfw::rt::Shape& biasShape);
+ BiasAccessor(BiasAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ const float* _biasData;
+ const nnfw::rt::Shape& _biasShape;
+};
+
+class OutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ OutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape);
+ OutputAccessor(OutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const nnfw::rt::Shape& _outputShape;
+};
+
+class MatrixOutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ MatrixOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape);
+ MatrixOutputAccessor(MatrixOutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const nnfw::rt::Shape& _outputShape;
+};
+
+class VectorOutputAccessor : public arm_compute::graph::ITensorAccessor
+{
+public:
+ VectorOutputAccessor(float* outputData, const nnfw::rt::Shape& outputShape);
+ VectorOutputAccessor(VectorOutputAccessor&&) = default;
+
+ // Inherited methods overriden:
+ bool access_tensor(arm_compute::ITensor& tensor) override;
+
+private:
+ float* _outputData;
+ const nnfw::rt::Shape& _outputShape;
+};
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::CLTensor& tensor, const float* data,
+ const nnfw::rt::Shape& shape)
+{
+ tensor.map();
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+ tensor.unmap();
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::CLTensor& tensor, float* data,
+ const nnfw::rt::Shape& shape)
+{
+ tensor.map();
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+ tensor.unmap();
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::Tensor& tensor, const float* data,
+ const nnfw::rt::Shape& shape)
+{
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+}
+
+template<typename AccessorType>
+inline void TensorAccess(arm_compute::Tensor& tensor, float* data,
+ const nnfw::rt::Shape& shape)
+{
+ AccessorType accessor(data, shape);
+ accessor.access_tensor(tensor);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_IO_ACCESSOR_H__
diff --git a/libs/kernel/acl/src/Init_acl.cpp b/libs/kernel/acl/src/Init_acl.cpp
new file mode 100644
index 0000000..cabf079
--- /dev/null
+++ b/libs/kernel/acl/src/Init_acl.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+// This will do one time initialization but can be called multiple times
+void Initialize(void)
+{
+ arm_compute::CLScheduler::get().default_init();
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/NEUniqueTensor.h b/libs/kernel/acl/src/NEUniqueTensor.h
new file mode 100644
index 0000000..34412f9
--- /dev/null
+++ b/libs/kernel/acl/src/NEUniqueTensor.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__
+#define __NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__
+
+#include <arm_compute/runtime/Tensor.h>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+// TODO: find a way to merge CLUniqueTensor and NEUniqueTensor.
+class NEUniqueTensor
+{
+public:
+ NEUniqueTensor(const ::arm_compute::TensorInfo &info)
+ {
+ _tensor.allocator()->init(info);
+ }
+
+public:
+ // Both copy and move are not allowed
+ NEUniqueTensor(const NEUniqueTensor &) = delete;
+ NEUniqueTensor(NEUniqueTensor &&) = delete;
+
+public:
+ ~NEUniqueTensor()
+ {
+ _tensor.allocator()->free();
+ }
+
+public:
+ void allocate()
+ {
+ _tensor.allocator()->allocate();
+ }
+
+public:
+ ::arm_compute::Tensor &ref(void) { return _tensor; }
+ ::arm_compute::Tensor *ptr(void) { return &_tensor; }
+
+private:
+ ::arm_compute::Tensor _tensor;
+};
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif //__NNFW_KERNEL_ACL_NEUNIQUETENSOR_H__
diff --git a/libs/kernel/acl/src/Reshape.h b/libs/kernel/acl/src/Reshape.h
new file mode 100644
index 0000000..ebd8247
--- /dev/null
+++ b/libs/kernel/acl/src/Reshape.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__
+#define __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "IO_accessor.h"
+#include "shape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace common {
+
+typedef std::function<void (void)> sync_scheduler_f;
+
+template<class TensorT, class LayerT>
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape,
+ sync_scheduler_f sync_scheduler) {
+
+ auto input_shape = util::fromNNShape(inputShape);
+ auto output_shape = util::fromNNShape(outputShape);
+
+ TensorT input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ TensorT output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ LayerT l;
+
+ l.configure(input.ptr(), output.ptr());
+
+ input.allocate();
+ output.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), (float*)inputData, inputShape);
+
+ l.run();
+
+ sync_scheduler();
+
+ TensorAccess<OutputAccessor>(output.ref(), (float*)outputData, outputShape);
+
+ return true;
+}
+
+} // namespace common
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
+#endif // __NNFW_KERNEL_ACL_RESHAPE_COMMON_H__
diff --git a/libs/kernel/acl/src/Reshape.test.h b/libs/kernel/acl/src/Reshape.test.h
new file mode 100644
index 0000000..a96a896
--- /dev/null
+++ b/libs/kernel/acl/src/Reshape.test.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Reshape.h>
+
+// TODO: fix include path in CMakeFiles
+#include "util.h"
+
+#ifndef ACL_TEST
+#error "ACL_TEST should be defined first!"
+#endif // ACL_TEST
+
+#ifndef ACL_CORE_FUNC_NAME
+#error "ACL_CORE_FUNC_NAME should be defined first!"
+#endif // ACL_CORE_FUNC_NAME
+
+using namespace nnfw::kernel::acl;
+
+ACL_TEST(KernelACL_TC, reshape_1) {
+ const nnfw::rt::Shape inputShape = {OperandType::FLOAT32, {1,1,9,1}, 1.0, 0};
+ float inputData[9] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
+
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float outputData[9] = {0};
+
+ bool bret = ACL_CORE_FUNC_NAME(inputData, inputShape,
+ outputData, outputShape);
+
+ EXPECT_EQ(bret, true);
+
+ float expectData[9] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+
+}
diff --git a/libs/kernel/acl/src/cl/Concatenation.cpp b/libs/kernel/acl/src/cl/Concatenation.cpp
new file mode 100644
index 0000000..9376006
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Concatenation.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+ const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ if (axis != 3)
+ {
+ assert("Only support axis=3 for ACL" && 0);
+ return false;
+ }
+ assert(inputDataPtrs.size() == inputShapes.size());
+
+ std::vector<arm_compute::CLTensor*> inputPtrs;
+ std::vector<arm_compute::ICLTensor*> inputIptrs;
+ arm_compute::CLTensor output;
+
+ // init Tensors
+ std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin();
+ for (auto inputData : inputDataPtrs)
+ {
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::CLTensor* inputPtr = new arm_compute::CLTensor();
+
+ inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ inputPtrs.push_back(inputPtr);
+ inputIptrs.push_back(inputPtr);
+
+ it_inputShape++;
+ }
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ // prepare ACL Concatenate and configure tensors
+ auto concat = std::make_shared<arm_compute::CLDepthConcatenateLayer>();
+ concat->configure(inputIptrs, &output);
+
+ // allocate Tensors
+ it_inputShape = inputShapes.begin();
+ std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin();
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->allocate();
+
+ const float* inputData = *it_inputData;
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+
+ TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape);
+
+ it_inputShape++;
+ it_inputData++;
+ }
+ output.allocator()->allocate();
+
+ // run
+ concat->run();
+ arm_compute::CLScheduler::get().sync();
+
+ // get output
+ TensorAccess<OutputAccessor>(output, outputData, outputShape);
+
+ // cleanup
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->free();
+ delete inputPtr;
+ }
+ output.allocator()->free();
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Concatenation.test.cpp b/libs/kernel/acl/src/cl/Concatenation.test.cpp
new file mode 100644
index 0000000..b2c5a58
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Concatenation.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Concatenation.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, concatFloat32_1)
+{
+ float inputData_1[6] = {
+ 1, 2, 3, 4, 5, 6 // [ [ [1],[2],[3] ], [ [4],[5],[6] ] ]
+ };
+ float inputData_2[6] = {
+ 7, 8, 9, 10, 11, 12 // [ [ [7],[8],[9] ], [ [10],[11],[12] ] ]
+ };
+ const nnfw::rt::Shape inputShape_1 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ const nnfw::rt::Shape inputShape_2 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ std::vector<const float*> inputDataPtrs;
+ std::vector<nnfw::rt::Shape> inputShapes;
+ float outputData[12];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,3,2}, 1.0, 0 };
+ bool bret;
+
+ inputDataPtrs.push_back(inputData_1);
+ inputDataPtrs.push_back(inputData_2);
+ inputShapes.push_back(inputShape_1);
+ inputShapes.push_back(inputShape_2);
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = concatenationFloat32(inputDataPtrs, inputShapes, 3,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12
+ };
+ float expectData[12]; // [ [ [1,7],[2,8],[3,9] ], [ [4,10],[5,11],[6,12] ] ]
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/cl/Conv2D.cpp b/libs/kernel/acl/src/cl/Conv2D.cpp
new file mode 100644
index 0000000..4783bdc
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Conv2D.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <util/environment.h>
+
+#include "../IO_accessor.h"
+#include "../util.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../support.h"
+
+#include "util/feature/TextFormatter.h"
+
+#include "support/nnapi/feature/Reader.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+static int verbose = 0;
+
+bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape);
+ arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+ CLUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+ CLUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ auto conv_f = std::make_shared<arm_compute::CLConvolutionLayer>();
+
+ conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info);
+
+ fns.emplace_back(conv_f);
+
+ util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ filter.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape);
+
+ nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose);
+ if (verbose)
+ {
+ input.ref().map();
+ auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape);
+ nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData};
+ nnfw::support::acl::feature::Reader<float> acl_ifm_reader{input.ptr()};
+
+ std::cout << "NNAPI IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl;
+
+ std::cout << "ARM Compute IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl;
+ input.ref().unmap();
+ }
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Conv2D.test.cpp b/libs/kernel/acl/src/cl/Conv2D.test.cpp
new file mode 100644
index 0000000..e34cdee
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Conv2D.test.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Conv2D.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, convFloat32_3x3to1x1)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 10.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x3to3x3)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 7.0f, 5.0f,
+ 7.0f, 10.0f, 7.0f,
+ 5.0f, 7.0f, 5.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x3to3x3_RELU)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { -5.0f };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] =
+ {
+ 0.0f, 1.0f, 0.0f,
+ 1.0f, 4.0f, 1.0f,
+ 0.0f, 1.0f, 0.0f
+ };
+
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, convFloat32_3x5to3x3)
+{
+ float inputData[15] = {
+ 1,2,3,4,5,
+ 6,7,8,9,10,
+ 11,12,13,14,15
+ };
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 };
+ float filterData[18] = {
+ 1,1,1, 1,1,1, 1,1,1,
+ 2,2,2, 2,2,2, 2,2,2
+ };
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 };
+ float biasData[2] = { 1.0, 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[30];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 };
+ bool bret;
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 17.0f, 28.0f, 34.0f, 40.0f, 29.0f,
+ 40.0f, 64.0f, 73.0f, 82.0f, 58.0f,
+ 37.0f, 58.0f, 64.0f, 70.0f, 49.0f,
+
+ 33.0f, 55.0f, 67.0f, 79.0f, 57.0f,
+ 79.0f, 127.0f, 145.0f, 163.0f, 115.0f,
+ 73.0f, 115.0f, 127.0f, 139.0f, 97.0f
+ };
+ float expectData[30];
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp b/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp
new file mode 100644
index 0000000..7593a99
--- /dev/null
+++ b/libs/kernel/acl/src/cl/DepthwiseConv2D.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../DepthwiseConv2D.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+static void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::depthwiseConvFloat32<CLUniqueTensor, arm_compute::CLDepthwiseConvolutionLayer,
+ arm_compute::CLActivationLayer>(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
diff --git a/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp b/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp
new file mode 100644
index 0000000..6955633
--- /dev/null
+++ b/libs/kernel/acl/src/cl/DepthwiseConv2D.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME depthwiseConvFloat32
+#define ACL_TEST(tc, t) TEST(tc, cl_##t)
+
+#include "../DepthwiseConv2D.test.h"
diff --git a/libs/kernel/acl/src/cl/FullyConnected.cpp b/libs/kernel/acl/src/cl/FullyConnected.cpp
new file mode 100644
index 0000000..7513355
--- /dev/null
+++ b/libs/kernel/acl/src/cl/FullyConnected.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../FullyConnected.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::fullyConnectedFloat32<CLUniqueTensor, arm_compute::CLFullyConnectedLayer,
+ arm_compute::CLActivationLayer>(inputData, inputShape,
+ weightsData, weightsShape,
+ biasData, biasShape,
+ activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/FullyConnected.test.cpp b/libs/kernel/acl/src/cl/FullyConnected.test.cpp
new file mode 100644
index 0000000..b1f5a09
--- /dev/null
+++ b/libs/kernel/acl/src/cl/FullyConnected.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME fullyConnectedFloat32
+#define ACL_TEST(tc, t) TEST(tc, cl_##t)
+
+#include "../FullyConnected.test.h"
diff --git a/libs/kernel/acl/src/cl/Pooling.cpp b/libs/kernel/acl/src/cl/Pooling.cpp
new file mode 100644
index 0000000..e22eacc
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Pooling.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+
+#include <cassert>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, false);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), maxpool_info);
+
+ fns.emplace_back(pool_f);
+
+ input.allocate();
+ output.allocate();
+
+ util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns);
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, true);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::CLPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), pool_info);
+
+ fns.emplace_back(pool_f);
+
+ input.allocate();
+ output.allocate();
+
+ util::insertFusedActivationLayer<CLUniqueTensor, arm_compute::CLActivationLayer>(output, activation, fns);
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Pooling.test.cpp b/libs/kernel/acl/src/cl/Pooling.test.cpp
new file mode 100644
index 0000000..8112e7a
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Pooling.test.cpp
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Pooling.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, maxPoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 9.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = -1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 1;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 6.0f,
+ 8.0f, 9.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_147x147to73x73)
+{
+ util::TensorWrapper input({1,147,147,64});
+ util::TensorWrapper output({1,73,73,64});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,73,73,64});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, maxPoolFloat32_71x71to35x35)
+{
+ util::TensorWrapper input({1,71,71,192});
+ util::TensorWrapper output({1,35,35,192});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,35,35,192});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 5.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 3.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 3.0f, 4.0f,
+ 6.0f, 7.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_3x3to3x3)
+{
+ std::vector<uint32_t> dims = {1,3,3,1};
+ util::TensorWrapper input(dims);
+ util::TensorWrapper output(dims);
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ int32_t value=1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected(dims);
+ float v=2.5f;
+ expected.initValue([&v](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ v = v + 0.5f;
+ return v;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_35x35to35x35)
+{
+ int32_t N=35;
+ std::vector<uint32_t> dims = {1,35,35,768};
+ util::TensorWrapper input(dims);
+ util::TensorWrapper output(dims);
+
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected(dims);
+ expected.initValue([&N](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, averagePoolFloat32_8x8to1x1)
+{
+ util::TensorWrapper input({1,8,8,2048});
+ util::TensorWrapper output({1,1,1,2048});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 8;
+ int32_t filter_height = 8;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = averagePoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,1,1,2048});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
diff --git a/libs/kernel/acl/src/cl/Reshape.cpp b/libs/kernel/acl/src/cl/Reshape.cpp
new file mode 100644
index 0000000..e420ab9
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Reshape.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../Reshape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+static void sync_scheduler() {
+ arm_compute::CLScheduler::get().sync();
+}
+
+bool reshapeGeneric(const void* inputData, const nnfw::rt::Shape& inputShape,
+ void* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::reshapeGeneric<CLUniqueTensor, arm_compute::CLReshapeLayer>
+ (inputData, inputShape, outputData, outputShape, sync_scheduler);
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Reshape.test.cpp b/libs/kernel/acl/src/cl/Reshape.test.cpp
new file mode 100644
index 0000000..db23a6d
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Reshape.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME reshapeGeneric
+#define ACL_TEST(tc, t) TEST(tc, cl_##t)
+
+#include "../Reshape.test.h"
diff --git a/libs/kernel/acl/src/cl/Softmax.cpp b/libs/kernel/acl/src/cl/Softmax.cpp
new file mode 100644
index 0000000..a628f05
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Softmax.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../CLUniqueTensor.h"
+#include "../util.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+bool softmaxFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float beta,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ CLUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ CLUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto softmax_f = std::make_shared<arm_compute::CLSoftmaxLayer>();
+ softmax_f->configure(input.ptr(), output.ptr(), beta);
+
+ input.allocate();
+ output.allocate();
+
+ if (inputShape.dimensions.size() == 4)
+ {
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ softmax_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else if (inputShape.dimensions.size() == 2)
+ {
+ TensorAccess<MatrixInputAccessor>(input.ref(), inputData, inputShape);
+
+ softmax_f->run();
+
+ arm_compute::CLScheduler::get().sync();
+
+ TensorAccess<MatrixOutputAccessor>(output.ref(), outputData, outputShape);
+ }
+ else
+ {
+ assert("undefined dimension of input" && 0);
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/cl/Softmax.test.cpp b/libs/kernel/acl/src/cl/Softmax.test.cpp
new file mode 100644
index 0000000..8ee8b41
--- /dev/null
+++ b/libs/kernel/acl/src/cl/Softmax.test.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Softmax.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, softmaxFloat32_1xn)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, softmaxFloat32_4d)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.25f, 0.25f, 0.25f, 0.25f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, softmaxFloat32_1xn_seq)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,4}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, softmaxFloat32_4d_seq)
+{
+ float inputData[4];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,4,1}, 1.0, 0 };
+ const float beta = 1.0f;
+ bool bret;
+
+ util::initData_Increasing(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = softmaxFloat32(inputData, inputShape, beta, outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {0.032058603280085, 0.0871443187420326, 0.23688281808991, 0.643914259887972};
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/gtest_env.cpp b/libs/kernel/acl/src/gtest_env.cpp
new file mode 100644
index 0000000..f6fc52f
--- /dev/null
+++ b/libs/kernel/acl/src/gtest_env.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+
+class TestEnvironment : public ::testing::Environment
+{
+public:
+ virtual ~TestEnvironment() = default;
+
+ virtual void SetUp()
+ {
+ nnfw::kernel::acl::Initialize();
+ }
+
+ virtual void TearDown()
+ {
+ // DO NOTHING
+ }
+};
+
+static ::testing::Environment* const testingenv =
+ ::testing::AddGlobalTestEnvironment(new TestEnvironment);
diff --git a/libs/kernel/acl/src/neon/Concatenation.cpp b/libs/kernel/acl/src/neon/Concatenation.cpp
new file mode 100644
index 0000000..8738a9d
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Concatenation.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+bool concatenationFloat32(const std::vector<const float*>& inputDataPtrs,
+ const std::vector<nnfw::rt::Shape>& inputShapes, int32_t axis,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ if (axis != 3)
+ {
+ assert("Only support axis=3 for ACL" && 0);
+ return false;
+ }
+ assert(inputDataPtrs.size() == inputShapes.size());
+
+ std::vector<arm_compute::Tensor*> inputPtrs;
+ std::vector<arm_compute::ITensor*> inputIptrs;
+ arm_compute::Tensor output;
+
+ // init Tensors
+ std::vector<nnfw::rt::Shape>::const_iterator it_inputShape = inputShapes.begin();
+ for (auto inputData : inputDataPtrs)
+ {
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::Tensor* inputPtr = new arm_compute::Tensor();
+
+ inputPtr->allocator()->init(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ inputPtrs.push_back(inputPtr);
+ inputIptrs.push_back(inputPtr);
+
+ it_inputShape++;
+ }
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ output.allocator()->init(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ // prepare ACL Concatenate and configure tensors
+ auto concat = std::make_shared<arm_compute::NEDepthConcatenateLayer>();
+ concat->configure(inputIptrs, &output);
+
+ // allocate Tensors
+ it_inputShape = inputShapes.begin();
+ std::vector<const float*>::const_iterator it_inputData = inputDataPtrs.begin();
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->allocate();
+
+ const float* inputData = *it_inputData;
+ const nnfw::rt::Shape& inputShape = *it_inputShape;
+
+ TensorAccess<InputAccessor>(*inputPtr, inputData, inputShape);
+
+ it_inputShape++;
+ it_inputData++;
+ }
+ output.allocator()->allocate();
+
+ // run
+ concat->run();
+
+ // get output
+ TensorAccess<OutputAccessor>(output, outputData, outputShape);
+
+ // cleanup
+ for (auto inputPtr : inputPtrs)
+ {
+ inputPtr->allocator()->free();
+ delete inputPtr;
+ }
+ output.allocator()->free();
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Concatenation.test.cpp b/libs/kernel/acl/src/neon/Concatenation.test.cpp
new file mode 100644
index 0000000..03b05bd
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Concatenation.test.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Concatenation.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_concatFloat32_1)
+{
+ float inputData_1[6] = {
+ 1, 2, 3, 4, 5, 6 // [ [ [1],[2],[3] ], [ [4],[5],[6] ] ]
+ };
+ float inputData_2[6] = {
+ 7, 8, 9, 10, 11, 12 // [ [ [7],[8],[9] ], [ [10],[11],[12] ] ]
+ };
+ const nnfw::rt::Shape inputShape_1 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ const nnfw::rt::Shape inputShape_2 = { OperandType::FLOAT32, {1,2,3,1}, 1.0, 0 };
+ std::vector<const float*> inputDataPtrs;
+ std::vector<nnfw::rt::Shape> inputShapes;
+ float outputData[12];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,3,2}, 1.0, 0 };
+ bool bret;
+
+ inputDataPtrs.push_back(inputData_1);
+ inputDataPtrs.push_back(inputData_2);
+ inputShapes.push_back(inputShape_1);
+ inputShapes.push_back(inputShape_2);
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::concatenationFloat32(inputDataPtrs, inputShapes, 3,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12
+ };
+ float expectData[12]; // [ [ [1,7],[2,8],[3,9] ], [ [4,10],[5,11],[6,12] ] ]
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/neon/Conv2D.cpp b/libs/kernel/acl/src/neon/Conv2D.cpp
new file mode 100644
index 0000000..679ecfc
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Conv2D.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+
+#include <util/environment.h>
+
+#include "../IO_accessor.h"
+#include "../util.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../support.h"
+
+#include "util/feature/TextFormatter.h"
+
+#include "support/nnapi/feature/Reader.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+static int verbose = 0;
+
+bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape filter_shape = util::fromNNShape(filterShape);
+ arm_compute::TensorShape bias_shape = util::fromVectorNNShape(biasShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+ arm_compute::PadStrideInfo conv_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+ NEUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
+ NEUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ auto conv_f = std::make_shared<arm_compute::NEConvolutionLayer>();
+
+ conv_f->configure(input.ptr(), filter.ptr(), bias.ptr(), output.ptr(), conv_info);
+
+ fns.emplace_back(conv_f);
+
+ util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+ bias.allocate();
+ filter.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+ TensorAccess<BiasAccessor>(bias.ref(), biasData, biasShape);
+ TensorAccess<WeightAccessor>(filter.ref(), filterData, filterShape);
+
+ nnfw::util::env::IntAccessor("CONV2D_VERBOSE").access(verbose);
+ if (verbose)
+ {
+ auto ifm_shape = nnfw::support::nnapi::feature::asFeatureShape(inputShape);
+ nnfw::support::nnapi::feature::Reader<float> nnapi_ifm_reader{ifm_shape, inputData};
+ nnfw::support::acl::feature::Reader<float> acl_ifm_reader{ input.ptr() };
+
+ std::cout << "NNAPI IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, nnapi_ifm_reader} << std::endl;
+
+ std::cout << "ARM Compute IFM:" << std::endl;
+ std::cout << nnfw::util::feature::TextFormatter<float>{ifm_shape, acl_ifm_reader} << std::endl;
+ }
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Conv2D.test.cpp b/libs/kernel/acl/src/neon/Conv2D.test.cpp
new file mode 100644
index 0000000..6a3de1c
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Conv2D.test.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <kernel/acl/Conv2D.h>
+
+// TODO: fix include path in CMakeFiles
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to1x1)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 10.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to3x3)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 7.0f, 5.0f,
+ 7.0f, 10.0f, 7.0f,
+ 5.0f, 7.0f, 5.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x3to3x3_RELU)
+{
+ float inputData[9];
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float filterData[9];
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ float biasData[1] = { -5.0f };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {1}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[9];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ bool bret;
+
+ util::initData(inputData, sizeof(inputData) / sizeof(inputData[0]), 1.0);
+ util::initData(filterData, sizeof(filterData) / sizeof(filterData[0]), 1.0);
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] =
+ {
+ 0.0f, 1.0f, 0.0f,
+ 1.0f, 4.0f, 1.0f,
+ 0.0f, 1.0f, 0.0f
+ };
+
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_convFloat32_3x5to3x3)
+{
+ float inputData[15] = {
+ 1,2,3,4,5,
+ 6,7,8,9,10,
+ 11,12,13,14,15
+ };
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,5,1}, 1.0, 0 };
+ float filterData[18] = {
+ 1,1,1, 1,1,1, 1,1,1,
+ 2,2,2, 2,2,2, 2,2,2
+ };
+ const nnfw::rt::Shape filterShape = { OperandType::FLOAT32, {2,3,3,1}, 1.0, 0 };
+ float biasData[2] = { 1.0, 1.0 };
+ const nnfw::rt::Shape biasShape = { OperandType::FLOAT32, {2}, 1.0, 0 };
+ int32_t padding_left = 1;
+ int32_t padding_right = 1;
+ int32_t padding_top = 1;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t activation = static_cast<int32_t>(FusedActivationFunc::RELU);
+ float outputData[30];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,3,5,2}, 1.0, 0 };
+ bool bret;
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ bret = neon::convFloat32(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectNCHW[] = {
+ 17.0f, 28.0f, 34.0f, 40.0f, 29.0f,
+ 40.0f, 64.0f, 73.0f, 82.0f, 58.0f,
+ 37.0f, 58.0f, 64.0f, 70.0f, 49.0f,
+
+ 33.0f, 55.0f, 67.0f, 79.0f, 57.0f,
+ 79.0f, 127.0f, 145.0f, 163.0f, 115.0f,
+ 73.0f, 115.0f, 127.0f, 139.0f, 97.0f
+ };
+ float expectData[30];
+ util::NCHW2NHWC(expectNCHW, expectData, outputShape);
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
diff --git a/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp b/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp
new file mode 100644
index 0000000..bcf56c6
--- /dev/null
+++ b/libs/kernel/acl/src/neon/DepthwiseConv2D.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/NEON/NEScheduler.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../DepthwiseConv2D.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace neon {
+static void sync_scheduler() {
+}
+
+bool depthwiseConvFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* filterData, const nnfw::rt::Shape& filterShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t depth_multiplier, int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+ return common::depthwiseConvFloat32<NEUniqueTensor, arm_compute::NEDepthwiseConvolutionLayer,
+ arm_compute::NEActivationLayer>(inputData, inputShape,
+ filterData, filterShape,
+ biasData, biasShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ depth_multiplier, activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp b/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp
new file mode 100644
index 0000000..d729d53
--- /dev/null
+++ b/libs/kernel/acl/src/neon/DepthwiseConv2D.test.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME neon::depthwiseConvFloat32
+#define ACL_TEST(tc, t) TEST(tc, neon_##t)
+
+#include "../DepthwiseConv2D.test.h"
diff --git a/libs/kernel/acl/src/neon/FullyConnected.cpp b/libs/kernel/acl/src/neon/FullyConnected.cpp
new file mode 100644
index 0000000..86229cb
--- /dev/null
+++ b/libs/kernel/acl/src/neon/FullyConnected.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/runtime/NEON/NEScheduler.h>
+
+#include <cassert>
+
+// TODO: fix include path in CMakeFiles
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+#include "../FullyConnected.h"
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+
+namespace neon {
+
+void sync_scheduler() {
+}
+
+bool fullyConnectedFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ const float* weightsData, const nnfw::rt::Shape& weightsShape,
+ const float* biasData, const nnfw::rt::Shape& biasShape,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape) {
+
+ return common::fullyConnectedFloat32<NEUniqueTensor, arm_compute::NEFullyConnectedLayer,
+ arm_compute::NEActivationLayer>(inputData, inputShape,
+ weightsData, weightsShape,
+ biasData, biasShape,
+ activation,
+ outputData, outputShape,
+ sync_scheduler);
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
+
diff --git a/libs/kernel/acl/src/neon/FullyConnected.test.cpp b/libs/kernel/acl/src/neon/FullyConnected.test.cpp
new file mode 100644
index 0000000..d4c95e4
--- /dev/null
+++ b/libs/kernel/acl/src/neon/FullyConnected.test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ACL_CORE_FUNC_NAME neon::fullyConnectedFloat32
+#define ACL_TEST(tc, t) TEST(tc, neon_##t)
+
+#include "../FullyConnected.test.h"
+
diff --git a/libs/kernel/acl/src/neon/Pooling.cpp b/libs/kernel/acl/src/neon/Pooling.cpp
new file mode 100644
index 0000000..5c58ae0
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Pooling.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OperationsUtils.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/TensorInfo.h>
+#include "../IO_accessor.h"
+#include "../shape.h"
+#include "../NEUniqueTensor.h"
+
+#include <cassert>
+
+namespace nnfw {
+namespace kernel {
+namespace acl {
+namespace neon {
+
+bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo maxpool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::MAX,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, false);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::NEPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), maxpool_info);
+
+ fns.emplace_back(pool_f);
+
+ util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
+ int32_t padding_left, int32_t padding_right,
+ int32_t padding_top, int32_t padding_bottom,
+ int32_t stride_width, int32_t stride_height,
+ int32_t filter_width, int32_t filter_height,
+ int32_t activation,
+ float* outputData, const nnfw::rt::Shape& outputShape)
+{
+ arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
+ arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
+
+ std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
+
+ arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::PoolingLayerInfo pool_info = arm_compute::PoolingLayerInfo(arm_compute::PoolingType::AVG,
+ arm_compute::Size2D(filter_width,filter_height),
+ pad_info, true);
+
+ NEUniqueTensor input(arm_compute::TensorInfo(input_shape, arm_compute::Format::F32));
+ NEUniqueTensor output(arm_compute::TensorInfo(output_shape, arm_compute::Format::F32));
+
+ auto pool_f = std::make_shared<arm_compute::NEPoolingLayer>();
+ pool_f->configure(input.ptr(), output.ptr(), pool_info);
+
+ fns.emplace_back(pool_f);
+
+ util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
+
+ input.allocate();
+ output.allocate();
+
+ TensorAccess<InputAccessor>(input.ref(), inputData, inputShape);
+
+ for (const auto &fn : fns)
+ {
+ fn->run();
+ }
+
+ TensorAccess<OutputAccessor>(output.ref(), outputData, outputShape);
+
+ return true;
+}
+
+} // namespace neon
+} // namespace acl
+} // namespace kernel
+} // namespace nnfw
diff --git a/libs/kernel/acl/src/neon/Pooling.test.cpp b/libs/kernel/acl/src/neon/Pooling.test.cpp
new file mode 100644
index 0000000..4e65939
--- /dev/null
+++ b/libs/kernel/acl/src/neon/Pooling.test.cpp
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <OperationsUtils.h>
+#include <kernel/acl/nnfw_kernel_acl.h>
+#include <arm_compute/core/Types.h>
+#include <kernel/acl/Pooling.h>
+
+#include "../util.h"
+
+using namespace nnfw::kernel::acl;
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 9.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to1x1_RELU)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = -1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value--;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_RELU;
+
+ bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = { 0.0f };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_3x3to2x2)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 1;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 1;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 2;
+ int32_t filter_height = 2;
+
+ float outputData[4];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,2,2,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::maxPoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+
+ float expectData[] = {
+ 5.0f, 6.0f,
+ 8.0f, 9.0f
+ };
+ bret = util::compareData(outputData, expectData, outputShape);
+ EXPECT_EQ(bret, true);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_147x147to73x73)
+{
+ util::TensorWrapper input({1,147,147,64});
+ util::TensorWrapper output({1,73,73,64});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = neon::maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,73,73,64});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, neon_maxPoolFloat32_71x71to35x35)
+{
+ util::TensorWrapper input({1,71,71,192});
+ util::TensorWrapper output({1,35,35,192});
+
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 2;
+ int32_t stride_height = 2;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ input.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ output.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 0.f;
+ });
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bool bret = neon::maxPoolFloat32(input.ptr<float>(), input.shape(),
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ output.ptr<float>(), output.shape());
+ EXPECT_EQ(bret, true);
+
+ util::TensorWrapper expected({1,35,35,192});
+ expected.initValue([](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return 1.0f;
+ });
+
+ EXPECT_EQ(output, expected);
+}
+
+TEST(KernelACL_TC, neon_averagePoolFloat32_3x3to1x1)
+{
+ util::TensorWrapper input({1,3,3,1});
+ const nnfw::rt::Shape inputShape = { OperandType::FLOAT32, {1,3,3,1}, 1.0, 0 };
+ int32_t padding_left = 0;
+ int32_t padding_right = 0;
+ int32_t padding_top = 0;
+ int32_t padding_bottom = 0;
+ int32_t stride_width = 1;
+ int32_t stride_height = 1;
+ int32_t filter_width = 3;
+ int32_t filter_height = 3;
+
+ float outputData[1];
+ const nnfw::rt::Shape outputShape = { OperandType::FLOAT32, {1,1,1,1}, 1.0, 0 };
+ bool bret;
+
+ float value = 1.0f;
+ input.initValue([&value](uint32_t n, uint32_t c, uint32_t h, uint32_t w) {
+ return value++;
+ });
+
+ util::initData(outputData, sizeof(outputData) / sizeof(outputData[0]), 0.0);
+
+ int32_t activation = ANEURALNETWORKS_FUSED_NONE;
+
+ bret = neon::averagePoolFloat32(input.ptr<float>(), inputShape,
+ padding_left, padding_right,
+ padding_top, padding_bottom,
+ stride_width, stride_height,
+ filter_width, filter_height,
+ activation,
+ outputData, outputShape);
+ EXPECT_EQ(bret, true);
+