summaryrefslogtreecommitdiff
path: root/libs
diff options
context:
space:
mode:
Diffstat (limited to 'libs')
-rw-r--r--libs/.FORMATCHECKED0
-rw-r--r--libs/ARMComputeEx/CMakeLists.txt21
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h245
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h79
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h78
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h106
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h81
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h58
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h62
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h96
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h61
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h58
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h113
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h57
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h104
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h129
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h55
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h81
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h59
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h60
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h73
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h125
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h104
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h69
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h58
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h59
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h142
-rw-r--r--libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h653
-rw-r--r--libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h113
-rw-r--r--libs/ARMComputeEx/arm_compute/core/TypesEx.h100
-rw-r--r--libs/ARMComputeEx/arm_compute/core/UtilsEx.h37
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h63
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h114
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h62
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h45
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h41
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h50
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h42
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h44
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h54
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h38
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h60
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h59
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h39
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h77
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h40
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h47
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h51
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h80
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h87
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h56
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h44
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h40
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h58
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h137
-rw-r--r--libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h83
-rw-r--r--libs/ARMComputeEx/resolve_includes.py102
-rw-r--r--libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp409
-rw-r--r--libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp123
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl89
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl94
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl74
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl126
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl70
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl84
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl146
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl86
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl93
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl69
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl84
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl57
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl98
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl88
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h352
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h406
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl48
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl86
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl72
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl88
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl80
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl111
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl74
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl88
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl152
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl163
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl69
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl75
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl63
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl103
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl130
-rw-r--r--libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl271
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp211
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp159
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp216
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp117
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp173
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp102
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp212
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp109
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp114
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp77
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp129
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp177
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp89
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp166
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp185
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp149
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp126
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp280
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp181
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp238
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp113
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp170
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp253
-rw-r--r--libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp468
-rw-r--r--libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp294
-rw-r--r--libs/ARMComputeEx/src/core/UtilsEx.cpp34
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp35
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp120
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp46
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp39
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp40
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp29
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp34
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp29
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp50
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp39
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp36
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp49
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp123
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp29
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp39
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp30
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp302
-rw-r--r--libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp74
-rw-r--r--libs/ARMComputeEx/src/runtime/topk_v2.h191
-rw-r--r--libs/CMakeLists.txt4
-rw-r--r--libs/cpp14/CMakeLists.txt2
-rw-r--r--libs/cpp14/include/cpp14/memory.h29
-rw-r--r--libs/misc/CMakeLists.txt13
-rw-r--r--libs/misc/examples/tensor_index_iterator.cpp74
-rw-r--r--libs/misc/include/misc/EnvVar.h107
-rw-r--r--libs/misc/include/misc/benchmark.h87
-rw-r--r--libs/misc/include/misc/environment.h130
-rw-r--r--libs/misc/include/misc/feature/Index.h137
-rw-r--r--libs/misc/include/misc/feature/IndexIterator.h105
-rw-r--r--libs/misc/include/misc/feature/Object.h117
-rw-r--r--libs/misc/include/misc/feature/Reader.h69
-rw-r--r--libs/misc/include/misc/feature/Shape.h77
-rw-r--r--libs/misc/include/misc/feature/TextFormatter.h116
-rw-r--r--libs/misc/include/misc/fp32.h99
-rw-r--r--libs/misc/include/misc/kernel/IndexIterator.h102
-rw-r--r--libs/misc/include/misc/kernel/RandomObject.h77
-rw-r--r--libs/misc/include/misc/kernel/Reader.h60
-rw-r--r--libs/misc/include/misc/kernel/Shape.h68
-rw-r--r--libs/misc/include/misc/matrix/IndexIterator.h99
-rw-r--r--libs/misc/include/misc/matrix/Reader.h59
-rw-r--r--libs/misc/include/misc/matrix/Shape.h63
-rw-r--r--libs/misc/include/misc/tensor/Comparator.h95
-rw-r--r--libs/misc/include/misc/tensor/Diff.h70
-rw-r--r--libs/misc/include/misc/tensor/Index.h105
-rw-r--r--libs/misc/include/misc/tensor/IndexEnumerator.h131
-rw-r--r--libs/misc/include/misc/tensor/IndexFormatter.h75
-rw-r--r--libs/misc/include/misc/tensor/IndexIterator.h107
-rw-r--r--libs/misc/include/misc/tensor/NonIncreasingStride.h83
-rw-r--r--libs/misc/include/misc/tensor/Object.h100
-rw-r--r--libs/misc/include/misc/tensor/Reader.h58
-rw-r--r--libs/misc/include/misc/tensor/Shape.h152
-rw-r--r--libs/misc/include/misc/tensor/Zipper.h104
-rw-r--r--libs/misc/include/misc/vector.h52
-rw-r--r--libs/misc/include/misc/vector/Object.h92
-rw-r--r--libs/misc/include/misc/vector/Reader.h58
-rw-r--r--libs/misc/src/environment.cpp95
-rw-r--r--libs/misc/src/tensor/Comparator.cpp40
-rw-r--r--libs/misc/src/tensor/IndexFormatter.cpp49
-rw-r--r--libs/misc/src/tensor/NonIncreasingStride.cpp46
-rw-r--r--libs/misc/src/tensor/Shape.cpp99
-rw-r--r--libs/profiling/CMakeLists.txt5
-rw-r--r--libs/profiling/include/profiling/profile_buffer.h170
-rw-r--r--libs/profiling/include/profiling/profiler.h203
-rw-r--r--libs/profiling/include/profiling/profiling.h57
-rw-r--r--libs/profiling/include/profiling/time.h35
-rw-r--r--libs/profiling/src/profiling/time.cpp55
-rw-r--r--libs/tflite/CMakeLists.txt12
-rw-r--r--libs/tflite/include/tflite/Assert.h45
-rw-r--r--libs/tflite/include/tflite/Diff.h199
-rw-r--r--libs/tflite/include/tflite/FeatureView.h106
-rw-r--r--libs/tflite/include/tflite/InputIndex.h60
-rw-r--r--libs/tflite/include/tflite/InterpreterSession.h99
-rw-r--r--libs/tflite/include/tflite/NNAPISession.h101
-rw-r--r--libs/tflite/include/tflite/OutputIndex.h60
-rw-r--r--libs/tflite/include/tflite/Quantization.h44
-rw-r--r--libs/tflite/include/tflite/Session.h69
-rw-r--r--libs/tflite/include/tflite/TensorLogger.h168
-rw-r--r--libs/tflite/include/tflite/TensorShapeUtils.h64
-rw-r--r--libs/tflite/include/tflite/TensorUtils.h54
-rw-r--r--libs/tflite/include/tflite/TensorView.h120
-rw-r--r--libs/tflite/include/tflite/ext/kernels/Abs.h41
-rw-r--r--libs/tflite/include/tflite/ext/kernels/CustomOps.h60
-rw-r--r--libs/tflite/include/tflite/ext/kernels/SquaredDifference.h76
-rw-r--r--libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h75
-rw-r--r--libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h41
-rw-r--r--libs/tflite/include/tflite/ext/kernels/register.h42
-rw-r--r--libs/tflite/include/tflite/ext/nnapi_delegate.h97
-rw-r--r--libs/tflite/include/tflite/interp/Builder.h53
-rw-r--r--libs/tflite/include/tflite/interp/FlatBufferBuilder.h64
-rw-r--r--libs/tflite/include/tflite/interp/FunctionBuilder.h67
-rw-r--r--libs/tflite/src/Diff.cpp598
-rw-r--r--libs/tflite/src/FeatureView.cpp70
-rw-r--r--libs/tflite/src/Quantization.cpp22
-rw-r--r--libs/tflite/src/TensorShapeUtils.cpp48
-rw-r--r--libs/tflite/src/TensorView.test.cpp53
-rw-r--r--libs/tflite/src/ext/kernels/Abs.cpp103
-rw-r--r--libs/tflite/src/ext/kernels/SquaredDifference.cpp112
-rw-r--r--libs/tflite/src/ext/kernels/TensorFlowMax.cpp405
-rw-r--r--libs/tflite/src/ext/kernels/TensorFlowSum.cpp400
-rw-r--r--libs/tflite/src/ext/kernels/register.cpp221
-rw-r--r--libs/tflite/src/ext/nnapi_delegate.cpp1209
-rw-r--r--libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc106
-rw-r--r--libs/tflite/src/interp/FlatBufferBuilder.cpp40
-rw-r--r--libs/tflite/src/interp/FunctionBuilder.cpp34
228 files changed, 0 insertions, 24110 deletions
diff --git a/libs/.FORMATCHECKED b/libs/.FORMATCHECKED
deleted file mode 100644
index e69de29bb..000000000
--- a/libs/.FORMATCHECKED
+++ /dev/null
diff --git a/libs/ARMComputeEx/CMakeLists.txt b/libs/ARMComputeEx/CMakeLists.txt
deleted file mode 100644
index 2483fb55d..000000000
--- a/libs/ARMComputeEx/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-if("${TARGET_ARCH}" STREQUAL "x86_64")
- return()
-endif()
-
-nnfw_find_package(ARMCompute REQUIRED)
-
-set(ACL_EX_BASE ${CMAKE_SOURCE_DIR}/libs/ARMComputeEx)
-
-file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp")
-
-# generate embeded cl_kernel
-execute_process (
- WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/libs/ARMComputeEx"
- COMMAND bash -c "python resolve_includes.py"
-)
-
-add_library(arm_compute_ex SHARED ${ACL_EX_SRCS})
-set_target_properties(arm_compute_ex PROPERTIES COMPILE_FLAGS "-DEMBEDDED_KERNELS=1")
-target_include_directories(arm_compute_ex PUBLIC ${CMAKE_SOURCE_DIR}/libs/ARMComputeEx)
-target_link_libraries(arm_compute_ex arm_compute_core)
-install(TARGETS arm_compute_ex DESTINATION lib)
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h b/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
deleted file mode 100644
index e4e752ef9..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/CLKernelLibraryEx.h
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLKernelLibraryEx.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file is a cloned version of CLKernelLibrary.h in ACL. This file defines
- * an interface for CLKernelLibrary.cpp which adds more OpenCL kernels on top of ACL.
- */
-
-#ifndef __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
-#define __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__
-
-#include "arm_compute/core/CL/OpenCL.h"
-
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-
-namespace arm_compute
-{
-
-/**
- * @brief Class to build OpenCL kernels added from nnfw
- * */
-class CLKernelLibraryEx
-{
- using StringSet = std::set<std::string>;
-
-private:
- /**
- * @brief Construct a new CLKernelLibraryEx object
- */
- CLKernelLibraryEx();
-
-public:
- /**
- * @brief Prevent instances of this class from being copied.
- */
- CLKernelLibraryEx(const CLKernelLibraryEx &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied.
- */
- const CLKernelLibraryEx &operator=(const CLKernelLibraryEx &) = delete;
-
- /**
- * @brief Get the KernelLibrary singleton.
- * @return The KernelLibrary instance
- */
- static CLKernelLibraryEx &get();
-
- /**
- * @brief Initialise the kernel library.
- * @param[in] kernel_path Path of the directory from which kernel sources are loaded.
- * @param[in] context CL context used to create programs.
- * @param[in] device CL device for which the programs are created.
- * @return N/A
- */
- void init(std::string kernel_path, cl::Context context, cl::Device device)
- {
- _kernel_path = std::move(kernel_path);
- _context = std::move(context);
- _device = std::move(device);
- }
-
- /**
- * @brief Set the path that the kernels reside in.
- * @param[in] kernel_path Path of the directory from which kernel sources are loaded.
- * @return N/A
- */
- void set_kernel_path(const std::string &kernel_path) { _kernel_path = kernel_path; };
-
- /**
- * @brief Get the path that the kernels reside in.
- * @return the path of kernel files
- */
- std::string get_kernel_path() { return _kernel_path; };
-
- /**
- * @brief Get the source of the selected program.
- * @param[in] program_name Program name.
- * @return Source of the selected program.
- */
- std::string get_program_source(const std::string &program_name);
-
- /**
- * @brief Set the CL context used to create programs.
- * @note Setting the context also resets the device to the
- * first one available in the new context.
- * @param[in] context A CL context.
- * @return N/A
- */
- void set_context(cl::Context context)
- {
- _context = std::move(context);
- if (_context.get() == nullptr)
- {
- _device = cl::Device();
- }
- else
- {
- const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
-
- if (cl_devices.empty())
- {
- _device = cl::Device();
- }
- else
- {
- _device = cl_devices[0];
- }
- }
- }
-
- /**
- * @brief Return associated CL context.
- * @return A CL context.
- */
- cl::Context &context() { return _context; }
-
- /**
- * @brief Set the CL device for which the programs are created.
- * @param[in] device A CL device.
- * @return N/A
- */
- void set_device(cl::Device device) { _device = std::move(device); }
-
- /**
- * @brief Gets the CL device for which the programs are created.
- * @return A CL device.
- */
- cl::Device &get_device() { return _device; }
-
- /**
- * @brief Return the device version
- * @return The content of CL_DEVICE_VERSION
- */
- std::string get_device_version();
-
- /**
- * @brief Create a kernel from the kernel library.
- * @param[in] kernel_name Kernel name.
- * @param[in] build_options_set Kernel build options as a set.
- * @return The created kernel.
- */
- Kernel create_kernel(const std::string &kernel_name,
- const StringSet &build_options_set = {}) const;
-
- /**
- * @brief Find the maximum number of local work items in a workgroup can be supported for the
- * kernel.
- * @param[in] kernel kernel object
- */
-
- size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
- /**
- * @brief Return the default NDRange for the device.
- * @return default NDRangeof the device
- */
- cl::NDRange default_ndrange() const;
-
- /**
- * @brief Clear the library's cache of binary programs
- * @return N/A
- */
- void clear_programs_cache()
- {
- _programs_map.clear();
- _built_programs_map.clear();
- }
-
- /**
- * @brief Access the cache of built OpenCL programs
- * @return program map data structure of which key is name of kernel and value is
- * kerel source name. (*.cl)
- */
- const std::map<std::string, cl::Program> &get_built_programs() const
- {
- return _built_programs_map;
- }
-
- /**
- * @brief Add a new built program to the cache
- * @param[in] built_program_name Name of the program
- * @param[in] program Built program to add to the cache
- * @return N/A
- */
- void add_built_program(const std::string &built_program_name, cl::Program program);
-
- /**
- * @brief Returns true if FP16 is supported by the CL device
- * @return true if the CL device supports FP16
- */
- bool fp16_supported() const;
-
- /**
- * @brief Returns true if int64_base_atomics extension is supported by the CL device
- * @return true if the CL device supports int64_base_atomics extension
- */
- bool int64_base_atomics_supported() const;
-
-private:
- /**
- * @brief Load program and its dependencies.
- * @param[in] program_name Name of the program to load.
- */
- const Program &load_program(const std::string &program_name) const;
- /**
- * @brief Concatenates contents of a set into a single string.
- * @param[in] s Input set to concatenate.
- * @return Concatenated string.
- */
- std::string stringify_set(const StringSet &s) const;
-
- cl::Context _context; /**< Underlying CL context. */
- cl::Device _device; /**< Underlying CL device. */
- std::string _kernel_path; /**< Path to the kernels folder. */
- mutable std::map<std::string, const Program>
- _programs_map; /**< Map with all already loaded program data. */
- mutable std::map<std::string, cl::Program>
- _built_programs_map; /**< Map with all already built program data. */
- static const std::map<std::string, std::string>
- _kernel_program_map; /**< Map that associates kernel names with programs. */
- static const std::map<std::string, std::string>
- _program_source_map; /**< Contains sources for all programs.
- Used for compile-time kernel inclusion. >*/
-};
-}
-#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_EX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h b/libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h
deleted file mode 100644
index dbda354d6..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/OpenCLEx.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_OPENCLEX_H__
-#define __ARM_COMPUTE_OPENCLEX_H__
-
-#include <string>
-#include <utility>
-
-/* Configure the Khronos C++ wrapper to target OpenCL 1.2: */
-#ifndef ARM_COMPUTE_NO_EXCEPTIONS
-#define CL_HPP_ENABLE_EXCEPTIONS
-#endif // ARM_COMPUTE_NO_EXCEPTIONS
-#define CL_HPP_CL_1_2_DEFAULT_BUILD
-#define CL_HPP_TARGET_OPENCL_VERSION 110
-#define CL_HPP_MINIMUM_OPENCL_VERSION 110
-#include <CL/cl2.hpp>
-
-namespace arm_compute
-{
-/** Class for loading OpenCL symbols. */
-class CLSymbolsEx final
-{
-private:
- CLSymbolsEx() = default;
- void load_symbols(void *handle);
-
-public:
- /** Get the static instance of CLSymbols.
- *
- * @return The static instance of CLSymbols.
- */
- static CLSymbolsEx &get();
- /** Load symbols from the given OpenCL library path.
- *
- * @param[in] library Path to the OpenCL library.
- *
- * @return True if loading the library is successful.
- */
- bool load(const std::string &library);
- /** Load symbols from any of the default OpenCL library names.
- *
- * @return True if loading any library is successful.
- */
- bool load_default();
-
-#define DECLARE_FUNCTION_PTR(func_name) std::function<decltype(func_name)> func_name##_ptr = nullptr
-
- DECLARE_FUNCTION_PTR(clGetEventInfo);
- DECLARE_FUNCTION_PTR(clSetEventCallback);
-
-#undef DECLARE_FUNCTION_PTR
-
-private:
- std::pair<bool, bool> _loaded{false, false};
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_OPENCLEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h
deleted file mode 100644
index 080cc47ef..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLActivationLayerExKernel.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__
-#define __ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the activation layer kernel. */
-class CLActivationLayerExKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLActivationLayerExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLActivationLayerExKernel(const CLActivationLayerExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLActivationLayerExKernel &operator=(const CLActivationLayerExKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLActivationLayerExKernel(CLActivationLayerExKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLActivationLayerExKernel &operator=(CLActivationLayerExKernel &&) = default;
- /** Default destructor */
- ~CLActivationLayerExKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will
- * store the result
- * of the activation function. Data types supported:
- * QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- */
- void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfoEx act_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLActivationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor
- * will store the result
- * of the activation function. Data types supported: QASYMM8/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfoEx &act_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLACTIVATIONLAYEREXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h
deleted file mode 100644
index b91a26159..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArgMinMaxKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLArgMinMaxKernel.h
- * @brief This file defines CLArgMinMaxKernel
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLARG_MIN_MAX_KERNEL_H__
-#define __ARM_COMPUTE_CLARG_MIN_MAX_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define interface for the argminmax max kernel.
- */
-class CLArgMinMaxKernel : public ICLKernel
-{
-public:
- /**
- * @brief Default constructor.
- */
- CLArgMinMaxKernel();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLArgMinMaxKernel to be copied
- */
- CLArgMinMaxKernel(const CLArgMinMaxKernel &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLArgMinMaxKernel to be copied
- * @return Reference of this instance
- */
- CLArgMinMaxKernel &operator=(const CLArgMinMaxKernel &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLArgMinMaxKernel to be moved
- */
- CLArgMinMaxKernel(CLArgMinMaxKernel &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLArgMinMaxKernel to be moved
- * @return Reference of this instance
- */
- CLArgMinMaxKernel &operator=(CLArgMinMaxKernel &&) = default;
- /**
- * @brief Initialise the kernel's input, output and border mode.
- * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[out] output The output tensor, Data types supported: same as @p input.
- * @param[in] argminmax_axis Axis to argminmax
- * return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, const uint32_t argminmax_axis,
- ArgOperation op);
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLArgMinMaxKernel
- * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * @param[in] argminmax_axis Axis to argminmax
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t argminmax_axis, ArgOperation op);
-
- /*
- * @brief Run CLArgMinMaxKernel op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
- /*
- * @brief Run CLArgMinMaxKernel op on CPU
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run_on_cpu(cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _argminmax_axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLargminmaxMAXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h
deleted file mode 100644
index 9a765f310..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__
-#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the arithmetic subtraction kernel (support broadcasting)
- *
- * Arithmetic subtraction is computed by:
- * @f[ output(x,y) = input1(x,y) - input2(x,y) @f]
- */
-class CLArithmeticSubtractionExKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLArithmeticSubtractionExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLArithmeticSubtractionExKernel(const CLArithmeticSubtractionExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLArithmeticSubtractionExKernel &operator=(const CLArithmeticSubtractionExKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLArithmeticSubtractionExKernel(CLArithmeticSubtractionExKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLArithmeticSubtractionExKernel &operator=(CLArithmeticSubtractionExKernel &&) = default;
- /** Default destructor */
- ~CLArithmeticSubtractionExKernel() = default;
-
- /** Initialise the kernel's inputs, output and convertion policy.
- *
- * @param[in] input1 First tensor input. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 Second tensor input. Data types supported: U8/S16/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8),
- * S16/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
- ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLArithmeticSubtractionExKernel
- *
- * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8),
- * S16/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, ConvertPolicy policy);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h
deleted file mode 100644
index 1387897c9..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__
-#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform BATCH_TO_SPACE_ND operation */
-class CLBatchToSpaceNDKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchToSpaceNDKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceNDKernel(const CLBatchToSpaceNDKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceNDKernel &operator=(const CLBatchToSpaceNDKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceNDKernel(CLBatchToSpaceNDKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceNDKernel &operator=(CLBatchToSpaceNDKernel &&) = default;
- /** Default destructor */
- ~CLBatchToSpaceNDKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int32_t *block_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
deleted file mode 100644
index ab33d9d3a..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
-#define __ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to return truth values of two input tensors for Binary Logical Op*/
-class CLBinaryLogicalOpKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBinaryLogicalOpKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLBinaryLogicalOpKernel(const CLBinaryLogicalOpKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLBinaryLogicalOpKernel &operator=(const CLBinaryLogicalOpKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBinaryLogicalOpKernel(CLBinaryLogicalOpKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBinaryLogicalOpKernel &operator=(CLBinaryLogicalOpKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input1 Source tensor1.
- * @param[in] input2 Source tensor2.
- * @param[out] output Output tensor.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
- BinaryLogicalOperation op);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLBINARYLOGICALOPKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
deleted file mode 100644
index 4c2feb903..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLCastKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLCastKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__
-#define __ARM_COMPUTE_CLCASTKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define OpenCL kernel for cast operation
- */
-class CLCastKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct CLCastKernel object
- */
- CLCastKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLCastKernel(const CLCastKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLCastKernel &operator=(const CLCastKernel &) = delete;
-
- /**
- * @brief Construct CLCastKernel object using default move constructor
- * @param[in] CLCastKernel object to move
- */
- CLCastKernel(CLCastKernel &&) = default;
-
- /**
- * @brief Allow instances of this class to be moved
- * @param[in] CLCastKernel object to move
- */
- CLCastKernel &operator=(CLCastKernel &&) = default;
-
- /**
- * @brief Destruct this CLCastKernel object
- */
- ~CLCastKernel() = default;
-
- /**
- * @brief Initialise the kernel's input and output.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h
deleted file mode 100644
index f5f455993..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLComparisonOpKernel.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__
-#define __ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to check if values in both tensors are equal*/
-class CLComparisonOpKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLComparisonOpKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLComparisonOpKernel(const CLComparisonOpKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLComparisonOpKernel &operator=(const CLComparisonOpKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComparisonOpKernel(CLComparisonOpKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComparisonOpKernel &operator=(CLComparisonOpKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input1 Source tensor1.
- * @param[in] input2 Source tensor2.
- * @param[out] output Output tensor.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
- const ComparisonOperation &op);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLCOMPARISON_OP_KERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
deleted file mode 100644
index 60ec7a82a..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform depthTospace operation */
-class CLDepthToSpaceKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthToSpaceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default;
- /** Default destructor */
- ~CLDepthToSpaceKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
deleted file mode 100644
index da075db69..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLEmbeddingLookupKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLEmbeddingLookupKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
-#define __ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
-* @brief Class to perform EmbeddingLookup operation with opencl kernel
-*/
-class CLEmbeddingLookupKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct a CLEmbeddingLookupKernel object
- * */
- CLEmbeddingLookupKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLEmbeddingLookupKernel(const CLEmbeddingLookupKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLEmbeddingLookupKernel &operator=(const CLEmbeddingLookupKernel &) = delete;
-
- /**
- * @brief Construct a CLEmbeddingLookupKernel object by using default move constructor
- * @param[in] CLEmbeddingLookupKernel object to move
- * */
- CLEmbeddingLookupKernel(CLEmbeddingLookupKernel &&) = default;
-
- /**
- * @brief Move assignment operator
- * @param[in] CLEmbeddingLookupKernel object to move
- * */
- CLEmbeddingLookupKernel &operator=(CLEmbeddingLookupKernel &&) = default;
-
- /**
- * @brief Destruct this object
- * */
- ~CLEmbeddingLookupKernel() = default;
-
- /**
- * @brief Set the input and output of the kernel
- * @param[in] input Source tensor.
- * Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] lookups Lookups are 1D tensor that values are indices into the first
- * dimension of input.
- * Data types supported: S32.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLEmbeddingLookupKernel
- * @param[in] input The input tensor info.
- * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * @param[in] lookups Lookups info. Data types supported: S32.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *lookups);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /** Source tensor */
- ICLTensor *_output; /** Destination tensor */
- const ICLTensor *_lookups; /** Lookups tensor */
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUPKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h
deleted file mode 100644
index a6ea539f8..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLExpKernel.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLEXPKERNEL_H__
-#define __ARM_COMPUTE_CLEXPKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform an exponential operation */
-class CLExpKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLExpKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLExpKernel(const CLExpKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLExpKernel &operator=(const CLExpKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLExpKernel(CLExpKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLExpKernel &operator=(CLExpKernel &&) = default;
- /** Default destructor */
- ~CLExpKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: F32.
- * @param[out] output Destination tensor. Data type supported: F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLEXPKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h
deleted file mode 100644
index 7e35a80b0..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLGatherKernel.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLGatherKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLGatherKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLGATHERKERNEL_H__
-#define __ARM_COMPUTE_CLGATHERKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define an interface for the gather kernel.
- */
-class CLGatherKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct CLGatherKernel object
- * */
- CLGatherKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- */
- CLGatherKernel(const CLGatherKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- */
- CLGatherKernel &operator=(const CLGatherKernel &) = delete;
-
- /**
- * @brief Construct CLGatherKernel object by using default move constructor
- * @param[in] CLGatherKernel object to move
- */
- CLGatherKernel(CLGatherKernel &&) = default;
-
- /**
- * @brief Move assignment operator
- * @param[in] CLGatherKernel object to move
- */
- CLGatherKernel &operator=(CLGatherKernel &&) = default;
-
- /**
- * @brief Initialise the kernel's input, output and border mode.
- * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
- * @param[in] input2 An input tensor. Data types supported: S32.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * @return N/A
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLGatherKernel
- * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
- * @param[in] input2 An input tensor. Data types supported: S32.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGATHERKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
deleted file mode 100644
index c3fc15637..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLHashtableLookupKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLHashtableLookupKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLHashtableLookupKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
-#define __ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
-* @brief Class to perform HashtableLookup operation with opencl kernel
-*/
-class CLHashtableLookupKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct a CLHashtableLookupKernel object
- * */
- CLHashtableLookupKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLHashtableLookupKernel(const CLHashtableLookupKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLHashtableLookupKernel &operator=(const CLHashtableLookupKernel &) = delete;
-
- /**
- * @brief Construct a CLHashtableLookupKernel object by using default move constructor
- * @param[in] CLHashtableLookupKernel object to move
- * */
- CLHashtableLookupKernel(CLHashtableLookupKernel &&) = default;
-
- /**
- * @brief Move assignment operator
- * @param[in] CLHashtableLookupKernel object to move
- * */
- CLHashtableLookupKernel &operator=(CLHashtableLookupKernel &&) = default;
-
- /**
- * @brief Destruct this object
- * */
- ~CLHashtableLookupKernel() = default;
-
- /**
- * @brief Set the input and output of the kernel
- * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
- * input.
- * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
- * Data types supported: S32
- * @param[in] input Source tensor.
- * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
- * input.
- * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
- * (True) or not (False). Data types supported: U8/QASYMM8
- * @return N/A
- */
- void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *input,
- ICLTensor *output, ICLTensor *hits);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLHashtableLookupKernel
- * @param[in] lookups The lookups tensor info. Data types supported: S32.
- * @param[in] keys The keys tensor info. keys and input pair represent a map.
- * Data types supported: S32
- * @param[in] input The input tensor info.
- * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output The output tensor. Data types and data layouts supported: Same as @p
- * input.
- * @param[out] hits The hits tensor info. A boolean tensor that indicates whether the lookup
- * hits
- * (True) or not (False). Data types supported: U8/QASYMM8
- * @return a status
- */
- static Status validate(const ITensorInfo *lookups, const ITensorInfo *keys,
- const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *hits);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_lookups; /** Lookups tensor */
- const ICLTensor *_keys; /** Keys tensor */
- const ICLTensor *_input; /** Source tensor */
- ICLTensor *_output; /** Destination tensor */
- ICLTensor *_hits; /** Hits tensor */
- std::unique_ptr<CLTensor> _lookup_indices{nullptr}; /** Lookup indices tensor */
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUPKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
deleted file mode 100644
index ccbea147e..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNegKernel.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLNEGKERNEL_H__
-#define __ARM_COMPUTE_CLNEGKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a negation operation on tensor*/
-class CLNegKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLNegKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLNegKernel(const CLNegKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLNegKernel &operator=(const CLNegKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLNegKernel(CLNegKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLNegKernel &operator=(CLNegKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLNEGKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h
deleted file mode 100644
index 181a6226a..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
-#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the normalization layer kernel.
- */
-class CLNormalizationLayerExKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLNormalizationLayerExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete;
- /** Default Move Constructor. */
- CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default;
- /** Default move assignment operator */
- CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported:
- * F16/F32.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as
- * input. Data types supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type,
- * normalization size and other parameters.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLNormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported:
- * F16/F32.
- * @param[in] output Destination tensor. Output will have the same number of dimensions as
- * input. Data types supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization
- * size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- BorderSize _border_size;
- bool _is_in_map;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
deleted file mode 100644
index eff1b8bd5..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__
-#define __ARM_COMPUTE_CLPRELU_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to calculate PReLU*/
-class CLPReLUKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPReLUKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLPReLUKernel(const CLPReLUKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLPReLUKernel &operator=(const CLPReLUKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPReLUKernel(CLPReLUKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPReLUKernel &operator=(CLPReLUKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor1.
- * @param[in] alpha Source tensor2.
- * @param[out] output Output tensor.
- */
- void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_alpha;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h
deleted file mode 100644
index cbaa2adee..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#ifndef __ARM_COMPUTE_CLPADLAYERKERNEL_H__
-#define __ARM_COMPUTE_CLPADLAYERKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform PAD operation */
-class CLPadLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPadLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel(const CLPadLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel(CLPadLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default;
- /** Default destructor */
- ~CLPadLayerKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] pad_size Padding Size tensor. Data types supported : S32
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
- ICLTensor *_pad_size; /**< Padding Size tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLPADLAYERKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h
deleted file mode 100644
index 3434deee8..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPermuteExKernel.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__
-#define __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class CLPermuteExKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPermuteExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteExKernel(const CLPermuteExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteExKernel &operator=(const CLPermuteExKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPermuteExKernel(CLPermuteExKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPermuteExKernel &operator=(CLPermuteExKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to permute. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLPermuteKernel
- *
- * @param[in] input First tensor input info. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PermutationVector _perm;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h
deleted file mode 100644
index d579f5d8f..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLPixelWiseDivisionKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLPixelWiseDivisionKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__
-#define __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Interface for the pixelwise division kernel.
- */
-class CLPixelWiseDivisionKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct a CLPixelWiseDivisionKernel object
- */
- CLPixelWiseDivisionKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- */
- CLPixelWiseDivisionKernel(const CLPixelWiseDivisionKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- */
- CLPixelWiseDivisionKernel &operator=(const CLPixelWiseDivisionKernel &) = delete;
-
- /**
- * @brief Construct a CLPixelWiseDivisionKernel object by using move constructor
- * @param[in] CLPixelWiseDivisionKernel object to move
- */
- CLPixelWiseDivisionKernel(CLPixelWiseDivisionKernel &&) = default;
-
- /**
- * @brief Allow instances of this class to be moved
- * @param[in] CLPixelWiseDivisionKernel object to move
- */
- CLPixelWiseDivisionKernel &operator=(CLPixelWiseDivisionKernel &&) = default;
-
- /**
- * @brief Initialise the kernel's input, output and border mode.
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note:
- * U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after division.
- * Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
- * even.
- * @return N/A
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLPixelWiseDivisionKernel
- * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after division.
- * Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale, ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- /**
- * @brief The size of the border for that kernel
- * @return The width in number of elements of the border.
- */
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
deleted file mode 100644
index a26a4a7fc..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLReduceOperationKernel.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLReduceOperationKernel.h
- * @brief This file defines CLReduceOperationKernel class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
-#define __ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define interface for the reduce operation kernel
- */
-class CLReduceOperationKernel : public ICLKernel
-{
-public:
- /**
- * @brief Default constructor
- */
- CLReduceOperationKernel();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLReduceOperationKernel(const CLReduceOperationKernel &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLReduceOperationKernel &operator=(const CLReduceOperationKernel &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- */
- CLReduceOperationKernel(CLReduceOperationKernel &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- */
- CLReduceOperationKernel &operator=(CLReduceOperationKernel &&) = default;
- /**
- * @brief Default destructor
- */
- ~CLReduceOperationKernel() = default;
-
- /**
- * @brief Set the input and output tensors.
- * @param[in] input Source tensor. Data types supported: U8/S32/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce.
- * @param[in] op Reduce operation to perform.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis,
- ReduceOperation op);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLReduceOperationKernel.
- * @param[in] input Source tensor info. Data types supported: U8/S32/F32.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce.
- * @param[in] op Reduce operation to perform.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
- ReduceOperation op);
-
- /*
- * @brief Run CLReduceOperationKernel op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue CLQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLREDUCEOPERATIONKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
deleted file mode 100644
index 68534f1ab..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
-#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform SPACE_TO_BATCH_ND operation */
-class CLSpaceToBatchNDKernel final : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToBatchNDKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToBatchNDKernel(const CLSpaceToBatchNDKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToBatchNDKernel &operator=(const CLSpaceToBatchNDKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToBatchNDKernel(CLSpaceToBatchNDKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToBatchNDKernel &operator=(CLSpaceToBatchNDKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToBatchNDKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @note The data layout of input and output must be the same.
- * @note The number of dimensions of input and output must be 4, and `spatial` dimensions
- * are height and width.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
- * Data layout supported: NCHW/NHWC
- * @param[in] block_size Block size tensor. Data types supported: S32.
- * @param[in] padding_size Padding size tensor. Data types supported: S32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
- * Data layout supported: NCHW/NHWC
- */
- void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size,
- ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_block_size; /**< Block size tensor */
- const ICLTensor *_padding_size; /**< Padding size tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-
-#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
deleted file mode 100644
index be845a549..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
-#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform spaceTodepth operation */
-class CLSpaceToDepthKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToDepthKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToDepthKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h
deleted file mode 100644
index a4c44e35d..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__
-#define __ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to return squared difference value of two tensors (x-y)^2*/
-class CLSquaredDifferenceKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSquaredDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLSquaredDifferenceKernel(const CLSquaredDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLSquaredDifferenceKernel &operator=(const CLSquaredDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSquaredDifferenceKernel(CLSquaredDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSquaredDifferenceKernel &operator=(CLSquaredDifferenceKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input1 Source tensor1.
- * @param[in] input2 Source tensor2.
- * @param[out] output Output tensor.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLSQUARED_DIFFERENCE_KERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h
deleted file mode 100644
index 6368c380e..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLStridedSliceExKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLStridedSliceExKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
-* @brief Class to define an interface for the kernel to extract a strided slice of a tensor
-*/
-class CLStridedSliceExKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct a CLStridedSliceExKernel object
- * */
- CLStridedSliceExKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLStridedSliceExKernel(const CLStridedSliceExKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLStridedSliceExKernel &operator=(const CLStridedSliceExKernel &) = delete;
-
- /**
- * @brief Construct a CLStridedSliceExKernel object by using default move constructor
- * @param[in] CLStridedSliceExKernel object to move
- * */
- CLStridedSliceExKernel(CLStridedSliceExKernel &&) = default;
-
- /**
- * @brief Move assignment operator
- * @param[in] CLStridedSliceExKernel object to move
- * */
- CLStridedSliceExKernel &operator=(CLStridedSliceExKernel &&) = default;
-
- /**
- * @brief Destruct this object
- * */
- ~CLStridedSliceExKernel() = default;
-
- /**
- * @brief Set the input and output of the kernel
- * @param[in] input Source tensor. Data type supported:
- * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] beginData The begin tensor. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] endData The end tensor. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] strideData The stride tensor. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] beginMask Mask for begin
- * @param[in] endMask Mask for end
- * @param[in] shrinkAxisMask Mask for shrink axis.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLStridedSliceExKernel
- * @param[in] input The input tensor info. Data types supported:
- * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * @param[in] begin The begin tensor info. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] end The end tensor info. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] stride The stride tensor info. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] beginMask Mask for begin
- * @param[in] endMask Mask for end
- * @param[in] shrinkAxisMask Mask for shrink axis.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *begin, const ITensorInfo *end,
- const ITensorInfo *stride, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /** Source tensor */
- ICLTensor *_output; /** Destination tensor */
- ICLTensor *_beginData; /** Start indices of input tensor */
- ICLTensor *_endData; /** Stop indices of input tensor */
- ICLTensor *_stridesData; /** Strides tensor */
- int32_t _beginMask; /** Begin mask */
- int32_t _endMask; /** End mask */
- int32_t _shrinkAxisMask; /** Shrink axis mask */
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
deleted file mode 100644
index eb2bad254..000000000
--- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLTopKV2Kernel.h
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLTopKV2Kernel.h
- * @brief This file defines classes for TopKV2Kernel
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLTOPKV2KERNEL_H__
-#define __ARM_COMPUTE_CLTOPKV2KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-// these parameters can be changed
-#define _ITEMS 16 // number of items in a group
-#define _GROUPS 4 // the number of virtual processors is _ITEMS * _GROUPS
-#define _HISTOSPLIT (_ITEMS * _GROUPS / 2) // number of splits of the histogram
-#define PERMUT // store the final permutation
-////////////////////////////////////////////////////////
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define CLTopKV2Single
- */
-class CLTopKV2Single : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLTopKV2Single();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied
- */
- CLTopKV2Single(const CLTopKV2Single &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2Single to be copied
- * @return Reference of this instance
- */
- CLTopKV2Single &operator=(const CLTopKV2Single &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved
- */
- CLTopKV2Single(CLTopKV2Single &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2Single to be moved
- * @return Reference of this instance
- */
- CLTopKV2Single &operator=(CLTopKV2Single &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[in] input An input tensor
- * @param[in] topk_values Values of the top k predictions
- * @param[in] topk_indices Indices of the top k predictions
- * @param[in] indices Indices
- * @param[in] temp_stack Temp stack
- * @param[in] k K of the top k predictions
- * @param[in] n Number times to quick-sort
- * return N/A
- */
- void configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices,
- cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n);
-
- /*
- * @brief Run CLTopKV2Single op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_topk_values;
- ICLTensor *_topk_indices;
-};
-
-/**
- * @brief Class to define CLTopKV2Init
- */
-class CLTopKV2Init : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLTopKV2Init();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied
- */
- CLTopKV2Init(const CLTopKV2Init &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2Init to be copied
- * @return Reference of this instance
- */
- CLTopKV2Init &operator=(const CLTopKV2Init &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved
- */
- CLTopKV2Init(CLTopKV2Init &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2Init to be moved
- * @return Reference of this instance
- */
- CLTopKV2Init &operator=(CLTopKV2Init &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[in] input An input tensor
- * @param[in] in_key_buf Buffer of input key
- * @param[in] in_ind_buf Buffer of input index
- * @param[in] n Number times to quick-sort
- * return N/A
- */
- void configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf, int n);
-
- /*
- * @brief Run CLTopKV2Init op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
-};
-
-/**
- * @brief Class to define CLRadixSortHistogram
- */
-class CLRadixSortHistogram : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLRadixSortHistogram();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied
- */
- CLRadixSortHistogram(const CLRadixSortHistogram &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortHistogram to be copied
- * @return Reference of this instance
- */
- CLRadixSortHistogram &operator=(const CLRadixSortHistogram &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved
- */
- CLRadixSortHistogram(CLRadixSortHistogram &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortHistogram to be moved
- * @return Reference of this instance
- */
- CLRadixSortHistogram &operator=(CLRadixSortHistogram &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] hist_buf Buffer of histogram
- * @param[in] bits Number of bits to be used for radix sort
- * @param[in] n Integer number size to sort
- * return N/A
- */
- void configure(cl::Buffer *hist_buf, int bits, int n);
-
- /**
- * @brief Set pass
- * @param[in] pass Passes made of in radix sort algorithm
- * @param[in] in_key_buf Buffer of input key
- * return N/A
- */
- void setPass(int pass, cl::Buffer *in_key_buf)
- {
- _pass = pass;
- _in_key_buf = in_key_buf;
- }
-
- /*
- * @brief Run CLRadixSortHistogram op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- int _pass;
- cl::Buffer *_in_key_buf;
-};
-
-/**
- * @brief Class to define CLRadixSortScanHistogram
- */
-class CLRadixSortScanHistogram : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLRadixSortScanHistogram();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied
- */
- CLRadixSortScanHistogram(const CLRadixSortScanHistogram &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortScanHistogram to be copied
- * @return Reference of this instance
- */
- CLRadixSortScanHistogram &operator=(const CLRadixSortScanHistogram &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved
- */
- CLRadixSortScanHistogram(CLRadixSortScanHistogram &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortScanHistogram to be moved
- * @return Reference of this instance
- */
- CLRadixSortScanHistogram &operator=(CLRadixSortScanHistogram &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] hist_buf Buffer of histogram
- * @param[out] glob_sum_buf Buffer of global sum
- * @param[in] bits Number of bits to be used for radix sort
- * return N/A
- */
- void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
-
- /*
- * @brief Run CLRadixSortScanHistogram op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/**
- * @brief Class to define CLRadixSortGlobalScanHistogram
- */
-class CLRadixSortGlobalScanHistogram : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLRadixSortGlobalScanHistogram();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied
- */
- CLRadixSortGlobalScanHistogram(const CLRadixSortGlobalScanHistogram &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortGlobalScanHistogram to be copied
- * @return Reference of this instance
- */
- CLRadixSortGlobalScanHistogram &operator=(const CLRadixSortGlobalScanHistogram &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved
- */
- CLRadixSortGlobalScanHistogram(CLRadixSortGlobalScanHistogram &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortGlobalScanHistogram to be moved
- * @return Reference of this instance
- */
- CLRadixSortGlobalScanHistogram &operator=(CLRadixSortGlobalScanHistogram &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] glob_sum_buf Buffer of global sum
- * @param[out] temp_buf Temp buffer to be used while RadixSortGlobalScanHistogram
- * @param[in] bits Number of bits to be used for radix sort
- * return N/A
- */
- void configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf, int bits);
-
- /*
- * @brief Run CLRadixSortGlobalScanHistogram op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/**
- * @brief Class to define CLRadixSortPasteHistogram
- */
-class CLRadixSortPasteHistogram : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLRadixSortPasteHistogram();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied
- */
- CLRadixSortPasteHistogram(const CLRadixSortPasteHistogram &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortPasteHistogram to be copied
- * @return Reference of this instance
- */
- CLRadixSortPasteHistogram &operator=(const CLRadixSortPasteHistogram &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved
- */
- CLRadixSortPasteHistogram(CLRadixSortPasteHistogram &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortPasteHistogram to be moved
- * @return Reference of this instance
- */
- CLRadixSortPasteHistogram &operator=(CLRadixSortPasteHistogram &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] hist_buf Buffer of histogram
- * @param[out] glob_sum_buf Buffer of global sum
- * @param[in] bits Number of bits to be used for radix sort
- * return N/A
- */
- void configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits);
-
- /*
- * @brief Run CLRadixSortPasteHistogram op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/**
- * @brief Class to define CLRadixSortReorder
- */
-class CLRadixSortReorder : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLRadixSortReorder();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied
- */
- CLRadixSortReorder(const CLRadixSortReorder &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLRadixSortReorder to be copied
- * @return Reference of this instance
- */
- CLRadixSortReorder &operator=(const CLRadixSortReorder &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved
- */
- CLRadixSortReorder(CLRadixSortReorder &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLRadixSortReorder to be moved
- * @return Reference of this instance
- */
- CLRadixSortReorder &operator=(CLRadixSortReorder &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] hist_buf Buffer of histogram
- * @param[in] bits Number of bits to be used for radix sort
- * @param[in] n Integer number size to sort
- * return N/A
- */
- void configure(cl::Buffer *hist_buf, int bits, int n);
-
- /**
- * @brief Set pass
- * @param[in] pass Passes made of in radix sort algorithm
- * @param[in] in_key_buf Buffer of input key
- * @param[out] out_key_buf Buffer of output key
- * @param[in] in_ind_buf Buffer of input index
- * @param[out] out_ind_buf Buffer of output index
- * return N/A
- */
- void setPass(int pass, cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
- cl::Buffer *out_ind_buf)
- {
- _pass = pass;
- _in_key_buf = in_key_buf;
- _out_key_buf = out_key_buf;
- _in_ind_buf = in_ind_buf;
- _out_ind_buf = out_ind_buf;
- }
- /*
- * @brief Run CLRadixSortReorder op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- int _pass;
- cl::Buffer *_in_key_buf;
- cl::Buffer *_out_key_buf;
- cl::Buffer *_in_ind_buf;
- cl::Buffer *_out_ind_buf;
-};
-
-/**
- * @brief Class to define CLTopKV2FindFirstNegative
- */
-class CLTopKV2FindFirstNegative : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLTopKV2FindFirstNegative();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied
- */
- CLTopKV2FindFirstNegative(const CLTopKV2FindFirstNegative &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2FindFirstNegative to be copied
- * @return Reference of this instance
- */
- CLTopKV2FindFirstNegative &operator=(const CLTopKV2FindFirstNegative &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved
- */
- CLTopKV2FindFirstNegative(CLTopKV2FindFirstNegative &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2FindFirstNegative to be moved
- * @return Reference of this instance
- */
- CLTopKV2FindFirstNegative &operator=(CLTopKV2FindFirstNegative &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] first_negative_idx_buf Buffer of the first negative index
- * @param[in] n Number times to find
- * return N/A
- */
- void configure(cl::Buffer *first_negative_idx_buf, int n);
-
- /**
- * @brief Set output buffer
- * @param[out] out_key_buf Buffer of output key
- * return N/A
- */
- void setOutputBuffer(cl::Buffer *out_key_buf) { _out_key_buf = out_key_buf; }
-
- /*
- * @brief Run CLTopKV2FindFirstNegative op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- cl::Buffer *_out_key_buf;
-};
-
-/**
- * @brief Class to define CLTopKV2ReorderNegatives
- */
-class CLTopKV2ReorderNegatives : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLTopKV2ReorderNegatives();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied
- */
- CLTopKV2ReorderNegatives(const CLTopKV2ReorderNegatives &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2ReorderNegatives to be copied
- * @return Reference of this instance
- */
- CLTopKV2ReorderNegatives &operator=(const CLTopKV2ReorderNegatives &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved
- */
- CLTopKV2ReorderNegatives(CLTopKV2ReorderNegatives &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2ReorderNegatives to be moved
- * @return Reference of this instance
- */
- CLTopKV2ReorderNegatives &operator=(CLTopKV2ReorderNegatives &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] first_negative_idx_buf Buffer of the first negative index
- * @param[in] n Number times to find
- * return N/A
- */
- void configure(cl::Buffer *first_negative_idx_buf, int n);
-
- /**
- * @brief Set buffers
- * @param[in] in_key_buf Buffer of input key
- * @param[out] out_key_buf Buffer of output key
- * @param[in] in_ind_buf Buffer of input index
- * @param[out] out_ind_buf Buffer of output index
- * return N/A
- */
- void setBuffers(cl::Buffer *in_key_buf, cl::Buffer *out_key_buf, cl::Buffer *in_ind_buf,
- cl::Buffer *out_ind_buf)
- {
- _in_key_buf = in_key_buf;
- _out_key_buf = out_key_buf;
- _in_ind_buf = in_ind_buf;
- _out_ind_buf = out_ind_buf;
- }
-
- /*
- * @brief Run CLTopKV2ReorderNegatives op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- cl::Buffer *_in_key_buf;
- cl::Buffer *_out_key_buf;
- cl::Buffer *_in_ind_buf;
- cl::Buffer *_out_ind_buf;
-};
-
-/**
- * @brief Class to define CLTopKV2Store
- */
-class CLTopKV2Store : public ICLKernel
-{
-public:
- /**
- * @brief Constructor
- */
- CLTopKV2Store();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied
- */
- CLTopKV2Store(const CLTopKV2Store &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLTopKV2Store to be copied
- * @return Reference of this instance
- */
- CLTopKV2Store &operator=(const CLTopKV2Store &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved
- */
- CLTopKV2Store(CLTopKV2Store &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLTopKV2Store to be moved
- * @return Reference of this instance
- */
- CLTopKV2Store &operator=(CLTopKV2Store &&) = default;
-
- /**
- * @brief Initialise kernel with params
- * @param[out] values Values tensor to store
- * @param[out] indices Indices tensor to be used for store
- * @param[in] k K of the top k predictions
- * @param[in] n Number times to store
- * return N/A
- */
- void configure(ICLTensor *values, ICLTensor *indices, int k, int n);
-
- /**
- * @brief Set buffers
- * @param[out] out_key_buf Buffer of output key
- * @param[out] out_ind_buf Buffer of output index
- * return N/A
- */
- void setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf);
-
- /*
- * @brief Run CLTopKV2Store op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_values;
- ICLTensor *_indices;
- cl::Buffer *_out_key_buf;
- cl::Buffer *_out_ind_buf;
-};
-
-} // namespace arm_compute
-
-#endif // __ARM_COMPUTE_CLTOPKV2KERNEL_H__
diff --git a/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h
deleted file mode 100644
index f7bf72985..000000000
--- a/libs/ARMComputeEx/arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYEREXKERNEL_H__
-#define __ARM_COMPUTE_NENORMALIZATIONLAYEREXKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the normalization layer kernel.
- */
-class NENormalizationLayerExKernel : public INEKernel
-{
-public:
- const char *name() const override { return "NENormalizationLayerKernel"; }
- /** Default constructor */
- NENormalizationLayerExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENormalizationLayerExKernel(const NENormalizationLayerExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NENormalizationLayerExKernel &operator=(const NENormalizationLayerExKernel &) = delete;
- /** Default Move Constructor. */
- NENormalizationLayerExKernel(NENormalizationLayerExKernel &&) = default;
- /** Default move assignment operator */
- NENormalizationLayerExKernel &operator=(NENormalizationLayerExKernel &&) = default;
- /** Default destructor */
- ~NENormalizationLayerExKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types
- * supported: FP16/F32.
- * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a
- * single input with dimensions [width, height, IFM],
- * Data type supported: same as @p input
- * @param[out] output Destination tensor. Output will have the same number of dimensions as
- * input. Data type supported: same as @p input
- * @param[in] norm_info Normalization layer information like the normalization type,
- * normalization size and other parameters.
- */
- void configure(const ITensor *input, const ITensor *input_squared, ITensor *output,
- NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NENormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types
- * supported: FP16/F32.
- * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a
- * single input with dimensions [width, height, IFM],
- * Data type supported: same as @p input
- * @param[in] output Destination tensor. Output will have the same number of dimensions as
- * input. Data type supported: same as @p input
- * @param[in] norm_info Normalization layer information like the normalization type,
- * normalization size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared,
- const ITensorInfo *output, NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
-
-private:
- /** Function to perform normalization depending on the given template
- * dimension. The second template parameter specifies whether the
- * normalization has to be 1D or 2D.
- *
- * @note Only supported normalizations are:
- * - 1D over X or Z
- * - 2D over X and Y
- *
- * @param[in] window Region on which to execute the kernel.
- */
- template <DataType dt, unsigned int dim, bool do_2D_norm>
- void normalize_float(const Window &window);
-
- /** Common signature for all the specialised normalization functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using NormalizationFunctionEx = void (NENormalizationLayerExKernel::*)(const Window &window);
-
-private:
- NormalizationFunctionEx _func;
- const ITensor *_input;
- const ITensor *_input_squared;
- ITensor *_output;
- NormalizationLayerInfo _norm_info;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NENORMALIZATIONLAYEREXKERNEL_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/TypesEx.h b/libs/ARMComputeEx/arm_compute/core/TypesEx.h
deleted file mode 100644
index 8381f1cc6..000000000
--- a/libs/ARMComputeEx/arm_compute/core/TypesEx.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_TYPESEX_H__
-#define __ARM_COMPUTE_TYPESEX_H__
-
-#include <cmath>
-#include <cstddef>
-#include <cstdint>
-#include <string>
-#include <utility>
-
-namespace arm_compute
-{
-
-/** Available ArgIndex operations **/
-enum class ArgOperation
-{
- MAX,
- MIN,
-};
-
-/** Available reduce operations */
-enum class ReduceOperation
-{
- MAX, /**< Max */
- MEAN, /**< Mean */
- SUM, /**< Sum */
- MIN, /**< Min */
-};
-
-/** Available binary logical operations */
-enum class BinaryLogicalOperation
-{
- AND, /**< AND */
- OR, /**< OR */
-};
-
-enum class ComparisonOperation
-{
- EQUAL, /**< EQUAL */
- NOT_EQUAL, /**< NOT_EQUAL */
-};
-
-/** Activation Layer Information class */
-class ActivationLayerInfoEx
-{
-public:
- /** Available activation functions */
- enum class ActivationFunction
- {
- RSQRT /**< Inverse Square root ( \f$ f(x) = \rsqrt{x} \f$ )*/
- };
-
- ActivationLayerInfoEx() = default;
- /** Default Constructor
- *
- * @param[in] f The activation function to use.
- * @param[in] a (Optional) The alpha parameter used by some activation functions
- * (@ref ActivationFunction::BOUNDED_RELU, @ref ActivationFunction::LU_BOUNDED_RELU,
- * @ref ActivationFunction::LINEAR, @ref ActivationFunction::TANH).
- * @param[in] b (Optional) The beta parameter used by some activation functions (@ref
- * ActivationFunction::LINEAR, @ref ActivationFunction::LU_BOUNDED_RELU, @ref
- * ActivationFunction::TANH).
- */
- ActivationLayerInfoEx(ActivationFunction f, float a = 0.0f, float b = 0.0f)
- : _act(f), _a(a), _b(b), _enabled(true)
- {
- }
- /** Get the type of activation function */
- ActivationFunction activation() const { return _act; }
- /** Get the alpha value */
- float a() const { return _a; }
- /** Get the beta value */
- float b() const { return _b; }
- /** Check if initialised */
- bool enabled() const { return _enabled; }
-
-private:
- ActivationFunction _act = {ActivationLayerInfoEx::ActivationFunction::RSQRT};
- float _a = {};
- float _b = {};
- bool _enabled = {false};
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TYPESEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h b/libs/ARMComputeEx/arm_compute/core/UtilsEx.h
deleted file mode 100644
index 8dd68a0c3..000000000
--- a/libs/ARMComputeEx/arm_compute/core/UtilsEx.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_UTILSEX_H__
-#define __ARM_COMPUTE_UTILSEX_H__
-
-#include "arm_compute/core/TypesEx.h"
-
-#include <cstdint>
-#include <cstdlib>
-#include <sstream>
-#include <string>
-
-namespace arm_compute
-{
-/** Translates a given activation function to a string.
- *
- * @param[in] act @ref ActivationLayerInfo::ActivationFunction to be translated to string.
- *
- * @return The string describing the activation function.
- */
-const std::string &string_from_activation_func_ex(ActivationLayerInfoEx::ActivationFunction act);
-}
-#endif /*__ARM_COMPUTE_UTILSEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h
deleted file mode 100644
index 7e578550f..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLActivationLayerEx.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__
-#define __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLActivationLayerExKernel
- *
- * @note The function simulates an activation layer with the specified activation function.
- */
-class CLActivationLayerEx : public ICLSimpleFunction
-{
-public:
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will
- * be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will
- * store the result
- * of the activation function. Data types supported:
- * QASYMM8/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer parameters.
- */
- void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfoEx act_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLActivationLayer
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor
- * will store the result
- * of the activation function. Data types supported: QASYMM8/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfoEx &act_info);
-};
-}
-#endif /* __ARM_COMPUTE_CLACTIVATIONLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h
deleted file mode 100644
index 8044c58af..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgMinMax.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLArgMinMax.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLArgMinMax class
- */
-
-#ifndef __ARM_COMPUTE_CLARG_MIN_MAX_H__
-#define __ARM_COMPUTE_CLARG_MIN_MAX_H__
-
-#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to execute CLArgMinMax operation
- */
-class CLArgMinMax : public IFunction
-{
-public:
- /**
- * @brief Construct a new CLArgMinMax object
- */
- CLArgMinMax();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLArgMinMax(const CLArgMinMax &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLArgMinMax &operator=(const CLArgMinMax &) = delete;
-
- /**
- * @brief Construct a new CLArgMinMax object by using copy constructor
- * @param[in] CLArgMinMax object to move
- */
- CLArgMinMax(CLArgMinMax &&) = default;
-
- /**
- * @brief Assign a CLArgMinMax object.
- * @param[in] CLArgMinMax object to assign. This object will be moved.
- */
- CLArgMinMax &operator=(CLArgMinMax &&) = default;
-
- /**
- * @brief Initialise the kernel's inputs and outputs.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[out] output The result of argminmaxMax operation. Data types supported: same as @p
- * input.
- * @param[in] axis Axis to argminmax. It must be sorted and no duplicates.
- * @param[in] is_min True for ArgMin operation.
- * @param[in] is_max Ture for ArgMax operation.
- * @return N/A
- */
- void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> argminmax_axis,
- ArgOperation op);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] axis Axis to argminmax
- * @param[out] output The result of argminmaxMax operation. Data types supported: same as @p
- * input.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis,
- const ITensorInfo *output, ArgOperation op);
-
- /**
- * @brief Run the kernels contained in the function
- * This operation works on CPU on GPU depending on the value of argminmax_MAX_RUN_ON_CPU macro
- * in CLArgMinMax.cpp.
- * If argminmax_MAX_RUN_ON_CPU == 1, CPU runs this operation.
- * Otherwise GPU runs this operation.
- * @return N/A
- */
- void run() override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- std::vector<uint32_t> _argminmax_axis;
- ArgOperation _arg_op;
-
- std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
- std::unique_ptr<CLArgMinMaxKernel[]> _argminmax_kernels{nullptr};
- size_t _num_of_kernels;
-};
-}
-#endif /*__ARM_COMPUTE_CLargminmax_MAX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h
deleted file mode 100644
index 34e6c6334..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__
-#define __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLArithmeticSubtractionExKernel
- *
- * @note The tensor data type for the inputs must be U8/S16/F16/F32.
- * @note The function performs an arithmetic subtraction between two tensors.
- */
-class CLArithmeticSubtractionEx : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's inputs, output and convertion policy.
- *
- * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32.
- * The input tensor is [in, out] because its TensorInfo might be modified
- * inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
- * The input tensor is [in, out] because its TensorInfo might be modified
- * inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8),
- * S16/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLArithmeticSubtractionEx
- *
- * @param[in] input1 First tensor input info. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8/S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8),
- * S16/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, ConvertPolicy policy);
-};
-}
-#endif /* __ARM_COMPUTE_CLARITHMETICSUBTRACTIONEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
deleted file mode 100644
index d16a0762d..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLBatchToSpaceNDKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLBatchToSpaceND : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] block_size A pointer to an array of integer values specifying block sizes
- * for spatial dimension.
- */
- void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
deleted file mode 100644
index 061e34f26..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLBINARYLOGICALOP_H__
-#define __ARM_COMPUTE_CLBINARYLOGICALOP_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLBinaryLogicalOp : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input1 Source tensor1. Data types supported: U8, QASYMM8.
- * @param[in] input2 Source tensor2. Data types supported: U8 QASYMM8.
- * @param[out] output Output tensor. Data types supported: U8, QASYMM8.
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- BinaryLogicalOperation op);
-};
-
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLBINARYLOGICALOP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
deleted file mode 100644
index 56b8408e2..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLCast.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLCast class
- */
-
-#ifndef __ARM_COMPUTE_CLCAST_H__
-#define __ARM_COMPUTE_CLCAST_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLCastKernel.
- * This converts the input tensor to the tensor of the output tensor's type.
- */
-class CLCast : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's input and output
- * @param[in, out] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * The input tensor is [in, out] because its TensorInfo might be
- * modified inside the kernel.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(ICLTensor *input, ICLTensor *output);
-};
-}
-#endif /* __ARM_COMPUTE_CLCAST_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h
deleted file mode 100644
index 1b0d70e7f..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLComparisonOp.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLCOMPARISON_OP_H__
-#define __ARM_COMPUTE_CLCOMPARISON_OP_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLComparisonOp : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input1 Source tensor1. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] input2 Source tensor2. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- const ComparisonOperation &op);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLCOMPARISON_OP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
deleted file mode 100644
index d78a6ada4..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLDepthToSpaceKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLDepthToSpace : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[block_size] block size integer only
- */
- void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-} // namesace arm_compute
-
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
deleted file mode 100644
index 257772a89..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLEmbeddingLookup.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLEmbeddingLookup.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLEmbeddingLookup class
- */
-
-#ifndef __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
-#define __ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to perform EmbeddingLookup operation
- */
-class CLEmbeddingLookup : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Set the input and output tensors.
- * @param[in] input Source tensor.
- * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
- * input.
- * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
- * input.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *lookups);
-};
-}
-#endif /*__ARM_COMPUTE_CLEMBEDDINGLOOKUP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h
deleted file mode 100644
index 2d0fc23a4..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLExp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLEXP_H__
-#define __ARM_COMPUTE_CLEXP_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLExpKernel */
-class CLExp : public ICLSimpleFunction
-{
-public:
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: F32.
- * @param[out] output Destination tensor. Data type supported: F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-};
-}
-#endif /* __ARM_COMPUTE_CLEXP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
deleted file mode 100644
index f7fd3cda1..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLGather.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLGather.h
- * @brief This file contains CLGather class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLGATHER_H__
-#define __ARM_COMPUTE_CLGATHER_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to to run @ref CLGatherKernel.
- */
-class CLGather : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's inputs, output and convertion policy.
- * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
- * @param[in] input2 An indexes tensor. Data types supported: S32.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * @return N/A
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration
- * of @ref CLGather
- * @param[in] input1 An input tensor. Data types supported: U8/S32/F32.
- * @param[in] input2 An indexes tensor. Data types supported: S32.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output);
-};
-}
-#endif /*__ARM_COMPUTE_CLGATHER_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
deleted file mode 100644
index 65aa6cbd5..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLHashtableLookup.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLHashtableLookup.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLHashtableLookup class
- */
-
-#ifndef __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
-#define __ARM_COMPUTE_CLHASHTABLELOOKUP_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to perform HashtableLookup operation
- */
-class CLHashtableLookup : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Set the input and output tensors.
- * @param[in] lookups Lookups 1D tensor that values are indices into the first dimension of
- * input.
- * @param[in] keys Keys 1D tensor. keys and input pair represent a map.
- * Data types supported: S32
- * @param[in] input Source tensor.
- * Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
- * input.
- * @param[out] hits Hits 1D tensor. A boolean tensor that indicates whether the lookup hits
- * (True) or not (False). Data types supported: U8/QASYMM8
- * @return N/A
- */
- void configure(const ICLTensor *lookups, const ICLTensor *keys, const ICLTensor *intput,
- ICLTensor *output, ICLTensor *hits);
-};
-}
-#endif /*__ARM_COMPUTE_CLHASHTABLELOOKUP_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
deleted file mode 100644
index 198a0fd4e..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNeg.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLNEG_H__
-#define __ARM_COMPUTE_CLNEG_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLNeg : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input Source tensor. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(ICLTensor *input, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLNEG_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h
deleted file mode 100644
index 4077245d5..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
-#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to compute a normalization layer. This function calls the following CL kernels:
- *
- * -# @ref CLFillBorderKernel
- * -# @ref CLNormalizationLayerKernelEx
- *
- */
-class CLNormalizationLayerEx : public IFunction
-{
-public:
- /** Default constructor */
- CLNormalizationLayerEx();
- /** Set the input and output tensors.
- *
- * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types
- * supported: F16/F32 (Written to by the border handler)
- * @param[out] output Destination tensor. Dimensions, data type and number of channels must
- * match the input ones.
- * @param[in] norm_info Normalization layer information like the normalization type,
- * normalization size and other parameters.
- */
- void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLNormalizationLayer
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported:
- * F16/F32
- * @param[in] output Destination tensor. Dimensions, data type and number of channels must
- * match the input ones.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization
- * size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const NormalizationLayerInfo &norm_info);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- CLNormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
-};
-}
-#endif /* __ARM_COMPUTE_CLNORMALIZATIONLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
deleted file mode 100644
index 622a61b5e..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLPRELU_H__
-#define __ARM_COMPUTE_CLPRELU_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLPReLU : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input. Data types supported:
- * QASYMM8/F16/F32.
- * @param[in] alpha. Data types supported:
- * QASYMM8/F16/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_H__*/
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
deleted file mode 100644
index d6ea486d1..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPadLayerEx.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#ifndef __ARM_COMPUTE_CLPADLAYEREX_H__
-#define __ARM_COMPUTE_CLPADLAYEREX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLPadLayerKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLPadLayerEx : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported:
- * U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported:
- * U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] pad_size Tensor for Padding values in NHWC format shape [n, 2],
- * where n is the rank of tensor . Data types supported: S32
- */
- void configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLPADLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h
deleted file mode 100644
index 9a0cc213c..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPermuteEx.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLPERMUTEEX_H__
-#define __ARM_COMPUTE_CLPERMUTEEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute an @ref CLPermuteKernel. */
-class CLPermuteEx : public ICLSimpleFunction
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input The input tensor to permute. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPermute.
- *
- * @param[in] input First tensor input info. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm);
-};
-}
-#endif /*__ARM_COMPUTE_CLPERMUTEEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
deleted file mode 100644
index b142d3a2e..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLPixelWiseDivision.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLPixelWiseDivision class
- */
-#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLPixelWiseDivisionKernel.
- */
-class CLPixelWiseDivision : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's inputs, output and convertion policy.
- * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32
- * The input tensor is [in, out] because its TensorInfo might be
- * modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
- * The input tensor is [in, out] because its TensorInfo might be
- * modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output The output tensor, Data types supported: same as @p input1.
- * Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or
- * 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
- * even.
- * @return N/A
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
- ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
- RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLPixelWiseDivision
- * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale = 1.f,
- ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
- RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-};
-}
-#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
deleted file mode 100644
index e1a6f6ab4..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLReduceOperation.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLReduceOperation class
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCEOPERATION_H__
-#define __ARM_COMPUTE_CLREDUCEOPERATION_H__
-
-#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
-#include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to perform ReduceOperation
- */
-class CLReduceOperation : public IFunction
-{
-public:
- /**
- * @brief Construct a new ReduceOperation object
- */
- CLReduceOperation();
-
- /**
- * @brief Set the input and output tensors.
- * @param[in] input Source tensor. Data types supported: U8/S32/F32
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p
- * input.
- * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
- * @param[in] op Reduce operation to perform.
- * @return N/A
- */
- void configure(ICLTensor *input, ICLTensor *output, const std::set<uint32_t> &axis,
- ReduceOperation op);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLReduceOperation.
- * @param[in] input Source tensor info. Data types supported: U8/S32/F32
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
- * input.
- * @param[in] axis Axis along which to reduce. It must be sorted and no duplicates.
- * @param[in] op Reduce operation to perform.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const std::set<uint32_t> &axis, const ReduceOperation &op);
-
- /**
- * @brief Run the OpenCL kernel for this operation
- * @return N/A
- */
- void run() override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- std::set<uint32_t> _axis;
-
- std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
- std::unique_ptr<CLReduceOperationKernel[]> _reduce_kernels{nullptr};
-};
-}
-#endif /*__ARM_COMPUTE_CLREDUCEOPERATION_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
deleted file mode 100644
index 7e2df8986..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToBatchND.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
-#define __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLSpaceToBatchNDKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/F16/S32/F32.
- * @note The function divides "spatial" dimensions of the input into a grid of blocks of shape
- * block_shape, and interleaves these blocks with the "batch" dimension such that in the output.
- */
-class CLSpaceToBatchND : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @note The data layout of input and output must be the same.
- * @note The number of dimensions of input and output must be 4, and `spatial` dimensions
- * are height and width.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/F16/S32/F32.
- * Data layout supported: NCHW/NHWC
- * @param[in] block_size Tensor of integer values specifying block sizes for spatial
- * dimension.
- * Data types supported: S32
- * @param[in] padding_size Tensor of integer values specifying padding sizes for spatial
- * dimension.
- * Data types supported: S32
- * @param[out] output Output tensor. Data types supported: same as @p input.
- * Data layout supported: NCHW/NHWC
- */
- void configure(const ICLTensor *input, const ICLTensor *block_size, const ICLTensor *padding_size,
- ICLTensor *output);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
deleted file mode 100644
index 17f762092..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
-#define __ARM_COMPUTE_CLSPACETODEPTH_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLSpaceToDepthKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLSpaceToDepth : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[block_size] block size integer only
- */
- void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h
deleted file mode 100644
index 3610ba71c..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLSquaredDifference.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__
-#define __ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLSquaredDifference : public ICLSimpleFunction
-{
-public:
- /** Initialise the function's source and destination.
- *
- * @param[in] input1 Source tensor1. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] input2 Source tensor2. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- */
- void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLSQUARED_DIFFERENCE_H__*/
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
deleted file mode 100644
index 6b26a85c8..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLStridedSlice.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class
- */
-
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLStridedSliceKernel
- */
-class CLStridedSliceEx : public ICLSimpleFunction
-{
-public:
- /**
- * @brief Initialise the kernel's inputs and outputs
- * @param[in] input Tensor input. Data type supported:
- * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] beginData 'begin' vector of strided slice operation
- * @param[in] endData 'end' vector of strided slice operation
- * @param[in] stridesData 'strides' vector of strided slice operation
- * @param[in] beginMask If the ith bit is set, begin[i] is ignored
- * @param[in] endMask If the ith bit is set, end[i] is ignored
- * @param[in] shrinkAxisMask If the ith bit is set, the ith specification shrinks the
- * dimensionality by 1, taking on the value at index begin[i]
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-};
-}
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */
diff --git a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h b/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
deleted file mode 100644
index 5327e016f..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/CL/functions/CLTopKV2.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLTopKV2.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLTopKV2 class
- */
-#ifndef __ARM_COMPUTE_CLTOPK_V2_H__
-#define __ARM_COMPUTE_CLTOPK_V2_H__
-
-#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
-
-#include "arm_compute/runtime/IFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to execute TopKV2 operation.
- */
-class CLTopKV2 : public IFunction
-{
-public:
- /**
- * @brief Construct a new CLTopKV2 object
- */
- CLTopKV2();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLTopKV2(const CLTopKV2 &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLTopKV2 &operator=(const CLTopKV2 &) = delete;
-
- /**
- * @brief Construct a new CLTopKV2 object by using copy constructor
- * @param[in] CLTopKV2 object to move
- */
- CLTopKV2(CLTopKV2 &&) = default;
-
- /**
- * @brief Assign a CLTopKV2 object.
- * @param[in] CLTopKV2 object to assign. This object will be moved.
- */
- CLTopKV2 &operator=(CLTopKV2 &&) = default;
-
- /**
- * @brief Initialise the kernel's inputs and outputs.
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[in] k The value of `k`.
- * @param[out] values Top k values. Data types supported: S32 if input type is U8/S16, F32 if
- * input type is F32.
- * @param[out] indices Indices related to top k values. Data types supported: S32 if input type
- * is U8/S16, F32 if input type is F32.
- * @return N/A
- */
- void configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices,
- int total_bits = 32, int bits = 4);
-
- /**
- * @brief Run the kernels contained in the function
- * Depending on the value of the following environment variables it works differently:
- * - If the value of environment variable "ACL_TOPKV2" == "GPU_SINGLE",
- * quick sort on GPU is used.
- * - If the value of environment variable "ACL_TOPKV2" == ""GPU"",
- * radix sort on GPU is used.
- * - For other value, TopKV2 runs on CPU
- * @return N/A
- */
- void run() override;
-
-private:
- void run_on_cpu();
- void run_on_gpu();
- void run_on_gpu_single_quicksort();
-
- uint32_t _k;
- uint32_t _total_bits;
- uint32_t _bits;
- uint32_t _radix;
- uint32_t _hist_buf_size;
- uint32_t _glob_sum_buf_size;
- uint32_t _n;
-
- ICLTensor *_input;
- ICLTensor *_values;
- ICLTensor *_indices;
-
- cl::Buffer _qs_idx_buf;
- cl::Buffer _qs_temp_buf;
- cl::Buffer _hist_buf;
- cl::Buffer _glob_sum_buf;
- cl::Buffer _temp_buf;
- cl::Buffer _first_negative_idx_buf;
- cl::Buffer _in_key_buf;
- cl::Buffer _out_key_buf;
- cl::Buffer _in_ind_buf;
- cl::Buffer _out_ind_buf;
-
- cl::Buffer *_p_in_key_buf;
- cl::Buffer *_p_out_key_buf;
- cl::Buffer *_p_in_ind_buf;
- cl::Buffer *_p_out_ind_buf;
-
- CLTopKV2Single _qs_kernel;
- CLTopKV2Init _init_kernel;
- CLRadixSortHistogram _hist_kernel;
- CLRadixSortScanHistogram _scan_hist_kernel;
- CLRadixSortGlobalScanHistogram _glob_scan_hist_kernel;
- CLRadixSortPasteHistogram _paste_hist_kernel;
- CLRadixSortReorder _reorder_kernel;
- CLTopKV2FindFirstNegative _find_first_negative_kernel;
- CLTopKV2ReorderNegatives _reorder_negatives_kernel;
- CLTopKV2Store _store_kernel;
-};
-}
-#endif // __ARM_COMPUTE_CLTOPK_V2_H__
diff --git a/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h b/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h
deleted file mode 100644
index fa7408ecd..000000000
--- a/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_NENORMALIZATIONLAYEREX_H__
-#define __ARM_COMPUTE_NENORMALIZATIONLAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
- *
- * -# @ref NEPixelWiseMultiplicationKernel
- * -# @ref NEFillBorderKernel
- * -# @ref NENormalizationLayerKernelEx
- *
- */
-class NENormalizationLayerEx : public IFunction
-{
-public:
- /** Default constructor */
- NENormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data type supported:
- * F16/F32
- * @param[out] output Destination with the same dimensions, data type and number of channels of
- * @p input
- * @param[in] norm_info Normalization layer information like the normalization type,
- * normalization size and other parameters.
- */
- void configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * NENormalizationLayer
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data type supported:
- * F16/F32
- * @param[in] output Destination with the same dimensions, data type and number of channels of
- * @p input
- * @param[in] norm_info Normalization layer information like the normalization type, normalization
- * size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const NormalizationLayerInfo &norm_info);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group; /**< Function memory group */
- NENormalizationLayerExKernel _norm_kernel; /**< Normalization layer kernel */
- NEPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel */
- NEFillBorderKernel _border_handler; /**< Kernel to handle borders */
- Tensor _input_squared; /**< The intermediate buffer which stores results of squaring input */
-};
-}
-#endif /* __ARM_COMPUTE_NENORMALIZATIONLAYEREX_H__ */
diff --git a/libs/ARMComputeEx/resolve_includes.py b/libs/ARMComputeEx/resolve_includes.py
deleted file mode 100644
index b3e252892..000000000
--- a/libs/ARMComputeEx/resolve_includes.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-# Copyright (c) 2016, 2017 ARM Limited.
-#
-# SPDX-License-Identifier: MIT
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-import collections
-import os.path
-import re
-import subprocess
-import glob
-
-
-def resolve_includes(target, source):
- # File collection
- FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents')
-
- # Include pattern
- pattern = re.compile("#include \"(.*)\"")
-
- # Get file contents
- files = []
- for i in range(len(source)):
- src = source[i]
- dst = target[i]
- f = open(src)
- cts = f.read()
- f.close()
- contents = cts.splitlines()
- entry = FileEntry(target_name=dst, file_contents=contents)
- files.append((os.path.basename(src), entry))
-
- # Create dictionary of tupled list
- files_dict = dict(files)
-
- # Check for includes (can only be files in the same folder)
- final_files = []
- for file in files:
- done = False
- tmp_file = file[1].file_contents
- print(file[1].target_name)
- while not done:
- file_count = 0
- updated_file = []
- for line in tmp_file:
- found = pattern.search(line)
- if found:
- include_file = found.group(1)
- data = files_dict[include_file].file_contents
- updated_file.extend(data)
- else:
- updated_file.append(line)
- file_count += 1
-
- # Check if all include are replaced.
- if file_count == len(tmp_file):
- done = True
-
- # Update temp file
- tmp_file = updated_file
-
- # Append and prepend string literal identifiers and add expanded file to final list
- tmp_file.insert(0, "R\"(\n")
- tmp_file.append("\n)\"")
- entry = FileEntry(target_name=file[1].target_name, file_contents=tmp_file)
- final_files.append((file[0], entry))
-
- # Write output files
- for file in final_files:
- with open(file[1].target_name, 'w+') as out_file:
- out_file.write("\n".join(file[1].file_contents))
-
-
-# Generate embed files
-cl_files = glob.glob('src/core/CL/cl_kernels/*.cl')
-cl_files += glob.glob('src/core/CL/cl_kernels/*.h')
-
-# DEBUG: print cl files
-print("cl_files:")
-print(cl_files)
-
-embed_files = [f + "embed" for f in cl_files]
-print("embed_files:")
-print(embed_files)
-
-resolve_includes(embed_files, cl_files)
diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
deleted file mode 100644
index 05ecdeb22..000000000
--- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Utils.h"
-
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <utility>
-#include <vector>
-
-using namespace arm_compute;
-
-const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
- // ARMComputeEx kernels
- {"activation_layer_ex", "activation_layer_ex.cl"},
- {"arg_op", "arg_operation.cl"},
- {"arithmetic_sub_ex", "arithmetic_op_ex.cl"},
- {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"},
- {"batch_to_space_nd", "batch_to_space_nd.cl"},
- {"binary_logical_op", "binary_logical_op.cl"},
- {"cast", "cast.cl"},
- {"cast_qasymm_in", "cast.cl"},
- {"cast_qasymm_out", "cast.cl"},
- {"comparison_op", "comparison_op.cl"},
- {"comparison_op_qasymm8", "comparison_op_quantized.cl"},
- {"depth_to_space", "depth_to_space.cl"},
- {"embedding_lookup", "embedding_lookup.cl"},
- {"exp_layer", "exp.cl"},
- {"gather", "gather.cl"},
- {"gather_1d", "gather.cl"},
- {"gather_1d_out", "gather.cl"},
- {"hashtable_lookup", "hashtable_lookup.cl"},
- {"neg_tensor", "neg_tensor.cl"},
- {"pad", "pad.cl"},
- {"permute_generic", "permute_ex.cl"},
- {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
- {"pixelwise_div_float", "pixelwise_div_float.cl"},
- {"pixelwise_div_int", "pixelwise_div_int.cl"},
- {"prelu", "prelu.cl"},
- {"prelu_qasymm8", "prelu_quantized.cl"},
- {"reduce_min_max", "reduce_operation.cl"},
- {"reduce_sum_mean", "reduce_operation.cl"},
- {"squared_difference", "squared_difference.cl"},
- {"strided_slice_ex", "strided_slice_ex.cl"},
- {"topkv2_init", "topkv2.cl"},
- {"topkv2_find_first_negative", "topkv2.cl"},
- {"topkv2_reorder_negatives", "topkv2.cl"},
- {"topkv2_store", "topkv2.cl"},
- {"radixsort_histogram", "topkv2_radixsort.cl"},
- {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
- {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
- {"radixsort_reorder", "topkv2_radixsort.cl"},
- {"topkv2_quicksort", "topkv2_quicksort.cl"},
- {"space_to_batch_4d_nchw", "space_to_batch.cl"},
- {"space_to_batch_4d_nhwc", "space_to_batch.cl"},
- {"space_to_depth", "space_to_depth.cl"},
-};
-
-const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
-#ifdef EMBEDDED_KERNELS
- {
- "activation_layer_ex.cl",
-#include "./cl_kernels/activation_layer_ex.clembed"
- },
- {
- "arg_operation.cl",
-#include "./cl_kernels/arg_operation.clembed"
- },
- {
- "arithmetic_op_ex.cl",
-#include "./cl_kernels/arithmetic_op_ex.clembed"
- },
- {
- "batch_to_space_nd.cl",
-#include "./cl_kernels/batch_to_space_nd.clembed"
- },
- {
- "cast.cl",
-#include "./cl_kernels/cast.clembed"
- },
- {
- "comparison_op.cl",
-#include "./cl_kernels/comparison_op.clembed"
- },
- {
- "comparison_op_quantized.cl",
-#include "./cl_kernels/comparison_op_quantized.clembed"
- },
- {
- "embedding_lookup.cl",
-#include "./cl_kernels/embedding_lookup.clembed"
- },
- {
- "depth_to_space.cl",
-#include "./cl_kernels/depth_to_space.clembed"
- },
- {
- "exp.cl",
-#include "./cl_kernels/exp.clembed"
- },
- {
- "gather.cl",
-#include "./cl_kernels/gather.clembed"
- },
- {
- "hashtable_lookup.cl",
-#include "./cl_kernels/hashtable_lookup.clembed"
- },
- {
- "helpers.h",
-#include "./cl_kernels/helpers.hembed"
- },
- {
- "helpers_asymm.h",
-#include "./cl_kernels/helpers_asymm.hembed"
- },
- {
- "binary_logical_op.cl",
-#include "./cl_kernels/binary_logical_op.clembed"
- },
- {
- "neg_tensor.cl",
-#include "./cl_kernels/neg_tensor.clembed"
- },
- {
- "pad.cl",
-#include "./cl_kernels/pad.clembed"
- },
- {
- "pixelwise_div_float.cl",
-#include "./cl_kernels/pixelwise_div_float.clembed"
- },
- {
- "pixelwise_div_int.cl",
-#include "./cl_kernels/pixelwise_div_int.clembed"
- },
- {
- "prelu.cl",
-#include "./cl_kernels/prelu.clembed"
- },
- {
- "prelu_quantized.cl",
-#include "./cl_kernels/prelu_quantized.clembed"
- },
- {
- "reduce_operation.cl",
-#include "./cl_kernels/reduce_operation.clembed"
- },
- {
- "space_to_batch.cl",
-#include "./cl_kernels/space_to_batch.clembed"
- },
- {
- "space_to_depth.cl",
-#include "./cl_kernels/space_to_depth.clembed"
- },
- {
- "squared_difference.cl",
-#include "./cl_kernels/squared_difference.clembed"
- },
- {
- "strided_slice_ex.cl",
-#include "./cl_kernels/strided_slice_ex.clembed"
- },
- {
- "topkv2.cl",
-#include "./cl_kernels/topkv2.clembed"
- },
- {
- "topkv2_radixsort.cl",
-#include "./cl_kernels/topkv2_radixsort.clembed"
- },
- {
- "topkv2_quicksort.cl",
-#include "./cl_kernels/topkv2_quicksort.clembed"
- },
- {
- "permute_ex.cl",
-#include "./cl_kernels/permute_ex.clembed"
- },
-
-#endif /* EMBEDDED_KERNELS */
-};
-
-CLKernelLibraryEx::CLKernelLibraryEx()
- : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
-{
- opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
- // CLKernelLibraryEx is built
-}
-
-CLKernelLibraryEx &CLKernelLibraryEx::get()
-{
- static CLKernelLibraryEx _kernel_library;
- return _kernel_library;
-}
-
-Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
- const StringSet &build_options_set) const
-{
- // Find which program contains the kernel
- auto kernel_program_it = _kernel_program_map.find(kernel_name);
-
- if (_kernel_program_map.end() == kernel_program_it)
- {
- ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
- }
- std::string concat_str;
-
- if (fp16_supported())
- {
- concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
- }
-
- if (get_cl_version(_device) == CLVersion::CL20)
- {
- concat_str += " -cl-std=CL2.0 ";
- }
- else if (arm_non_uniform_workgroup_supported(_device))
- {
- concat_str += " -cl-arm-non-uniform-work-group-size ";
- }
- else
- {
- ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
- }
-
- // Check if the program has been built before with same build options.
- const std::string program_name = kernel_program_it->second;
- const std::string build_options = stringify_set(build_options_set) + concat_str;
-
- const std::string built_program_name = program_name + "_" + build_options;
- auto built_program_it = _built_programs_map.find(built_program_name);
-
- cl::Program cl_program;
-
- if (_built_programs_map.end() != built_program_it)
- {
- // If program has been built, retrieve to create kernel from it
- cl_program = built_program_it->second;
- }
- else
- {
- // Get program
- Program program = load_program(program_name);
-
- // Build program
- cl_program = program.build(build_options);
-
- // Add built program to internal map
- _built_programs_map.emplace(built_program_name, cl_program);
- }
-
- // Create and return kernel
- return Kernel(kernel_name, cl_program);
-}
-
-void CLKernelLibraryEx::add_built_program(const std::string &built_program_name,
- cl::Program program)
-{
- _built_programs_map.emplace(built_program_name, program);
-}
-
-bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); }
-
-bool CLKernelLibraryEx::int64_base_atomics_supported() const
-{
- return device_supports_extension(_device, "cl_khr_int64_base_atomics");
-}
-
-const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const
-{
- const auto program_it = _programs_map.find(program_name);
-
- if (program_it != _programs_map.end())
- {
- return program_it->second;
- }
-
- Program program;
-
-#ifdef EMBEDDED_KERNELS
- const auto program_source_it = _program_source_map.find(program_name);
-
- if (_program_source_map.end() == program_source_it)
- {
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
- }
-
- program = Program(_context, program_name, program_source_it->second);
-#else /* EMBEDDED_KERNELS */
- // Check for binary
- std::string source_name = _kernel_path + program_name;
- std::string binary_name = source_name + "bin";
-
- if (std::ifstream(binary_name).is_open())
- {
- const std::string program_binary = read_file(binary_name, true);
- program = Program(_context, _device, program_name,
- std::vector<unsigned char>(program_binary.begin(), program_binary.end()));
- }
- else if (std::ifstream(source_name).is_open())
- {
- program = Program(_context, program_name, read_file(source_name, false));
- }
- else
- {
- ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str());
- }
-#endif /* EMBEDDED_KERNELS */
-
- // Insert program to program map
- const auto new_program = _programs_map.emplace(program_name, std::move(program));
-
- return new_program.first->second;
-}
-
-std::string CLKernelLibraryEx::stringify_set(const StringSet &s) const
-{
- std::string concat_set;
-
-#ifndef EMBEDDED_KERNELS
- concat_set += "-I" + _kernel_path + " ";
-#endif /* EMBEDDED_KERNELS */
-
- // Concatenate set
- for (const auto &el : s)
- {
- concat_set += " " + el;
- }
-
- return concat_set;
-}
-
-std::string CLKernelLibraryEx::get_program_source(const std::string &program_name)
-{
- const auto program_source_it = _program_source_map.find(program_name);
-
- if (program_source_it == _program_source_map.end())
- {
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
- }
-
- return program_source_it->second;
-}
-
-size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) const
-{
- size_t result;
-
- size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result);
- ARM_COMPUTE_ERROR_ON_MSG(
- err != 0,
- "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
- ARM_COMPUTE_UNUSED(err);
-
- return result;
-}
-
-cl::NDRange CLKernelLibraryEx::default_ndrange() const
-{
- // GPUTarget _target = get_target_from_device(_device);
- cl::Device device = cl::Device::getDefault();
- GPUTarget _target = get_target_from_device(device);
- cl::NDRange default_range;
-
- switch (_target)
- {
- case GPUTarget::MIDGARD:
- case GPUTarget::T600:
- case GPUTarget::T700:
- case GPUTarget::T800:
- default_range = cl::NDRange(128u, 1);
- break;
- default:
- default_range = cl::NullRange;
- }
-
- return default_range;
-}
-
-std::string CLKernelLibraryEx::get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); }
diff --git a/libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp b/libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp
deleted file mode 100644
index cbda169fb..000000000
--- a/libs/ARMComputeEx/src/core/CL/OpenCLEx.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/OpenCLEx.h"
-
-#include <dlfcn.h>
-#include <iostream>
-
-namespace arm_compute
-{
-CLSymbolsEx &CLSymbolsEx::get()
-{
- static CLSymbolsEx symbols;
- return symbols;
-}
-
-bool CLSymbolsEx::load_default()
-{
- static const std::vector<std::string> libraries{"libOpenCL.so", "libGLES_mali.so", "libmali.so"};
-
- if (_loaded.first)
- {
- return _loaded.second;
- }
-
- // Indicate that default loading has been tried
- _loaded.first = true;
-
- for (const auto &lib : libraries)
- {
- if (load(lib))
- {
- return true;
- }
- }
-
- std::cerr << "Couldn't find any OpenCL library.\n";
- return false;
-}
-
-bool CLSymbolsEx::load(const std::string &library)
-{
- void *handle = dlopen(library.c_str(), RTLD_LAZY | RTLD_LOCAL);
-
- if (handle == nullptr)
- {
- std::cerr << "Can't load " << library << ": " << dlerror() << "\n";
- // Set status of loading to failed
- _loaded.second = false;
- return false;
- }
-
-#define LOAD_FUNCTION_PTR(func_name, handle) \
- func_name##_ptr = reinterpret_cast<decltype(func_name) *>(dlsym(handle, #func_name));
-
- LOAD_FUNCTION_PTR(clGetEventInfo, handle);
- LOAD_FUNCTION_PTR(clSetEventCallback, handle);
-
-#undef LOAD_FUNCTION_PTR
-
- // Don't call dlclose(handle) or all the symbols will be unloaded !
-
- // Disable default loading and set status to successful
- _loaded = std::make_pair(true, true);
-
- return true;
-}
-
-} // namespace arm_compute
-
-cl_int clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size,
- void *param_value, size_t *param_value_size_ret)
-{
- arm_compute::CLSymbolsEx::get().load_default();
- auto func = arm_compute::CLSymbolsEx::get().clGetEventInfo_ptr;
- if (func != nullptr)
- {
- return func(event, param_name, param_value_size, param_value, param_value_size_ret);
- }
- else
- {
- return CL_OUT_OF_RESOURCES;
- }
-}
-
-cl_int clSetEventCallback(cl_event event, cl_int command_exec_callback_type,
- void(CL_CALLBACK *pfn_ev_notify)(cl_event ev, cl_int ev_cmd_exec_status,
- void *user_data),
- void *user_data)
-{
- arm_compute::CLSymbolsEx::get().load_default();
- auto func = arm_compute::CLSymbolsEx::get().clSetEventCallback_ptr;
- if (func != nullptr)
- {
- return func(event, command_exec_callback_type, pfn_ev_notify, user_data);
- }
- else
- {
- return CL_OUT_OF_RESOURCES;
- }
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl
deleted file mode 100644
index f54c7bde3..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/activation_layer_ex.cl
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
-
-#define CONST_ONE 1.f
-#define DIV_OP(a, b) ((a) / (b))
-#define RSQRT_OP(a) DIV_OP(CONST_ONE, sqrt((a)))
-
-// Inverse Square-root Activation
-inline TYPE rsqrt_op(TYPE x)
-{
- return RSQRT_OP(x);
-}
-
-#define ACTIVATION_OP2(op, x) op##_op(x)
-#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x)
-
-#if defined(ACT)
-
-/** This performs an activation function floating point inputs.
- *
- * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
- * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void activation_layer_ex(
- TENSOR3D_DECLARATION(input)
-#ifndef IN_PLACE
- ,
- TENSOR3D_DECLARATION(output)
-#endif /* not IN_PLACE */
-)
-{
- // Get pixels pointer
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
-#ifdef IN_PLACE
- Tensor3D output = input;
-#else /* IN_PLACE */
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-#endif /* IN_PLACE */
-
- // Load data
- TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
-
- // Perform activation
- data = ACTIVATION_OP(ACT, data);
-
- // Store result
- VSTORE(VEC_SIZE)
- (data, 0, (__global DATA_TYPE *)output.ptr);
-}
-
-#endif /* defined(ACT) */
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
deleted file mode 100644
index 9a6921d7c..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
-/** Perform arg_max/arg_min
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using
- * -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[out] output_ptr Pointer to the destination image. Supported data types: U32
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] axis Axis through which reduction occurs for max value index
- * @param[in] dim Dimension across the axis to be reduced.
- */
-
-__kernel void arg_op(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- const int axis,
- const int dim)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int indices[4] =
- {
- get_global_id(0),
- get_global_id(1),
- get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
- };
-
- DATA_TYPE value = *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
- DATA_TYPE tval = value;
- int idx = 0;
- for(int i = 1; i < dim; ++i)
- {
- indices[axis] = i;
-
- #if OP_CODE == 1 // ArgMax
- value = max(value, *((__global DATA_TYPE *)
- tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])));
- #elif OP_CODE == 2 //ArgMin
- value = min(value, *((__global DATA_TYPE *)
- tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])));
- #else
- return;
-
- #endif
-
- if(tval!=value)
- {
- idx = indices[axis];
- tval = value;
- }
- }
-
- *((__global uint *)out.ptr) = idx;
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl
deleted file mode 100644
index 2ed698951..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_ex.cl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifdef SATURATE
-#define SUB(x, y) sub_sat((x), (y))
-#else /* SATURATE */
-#define SUB(x, y) (x) - (y)
-#endif /* SATURATE */
-
-/** This function subtracts one tensors from another.
- *
- * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=short
- * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
- *
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: U8, S16
- * @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] in2_ptr Pointer to the source tensor. Supported data types: U8, S16
- * @param[in] in2_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8, S16
- * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void arithmetic_sub_ex(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out))
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load values
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- in_a = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- in_b = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
-
- // Calculate and store result
- vstore16(SUB(in_a, in_b), 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
deleted file mode 100644
index 5cd0a4309..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers_asymm.h"
-
-#ifdef SATURATE
-#define ADD(x, y) add_sat((x), (y))
-#define SUB(x, y) sub_sat((x), (y))
-#else /* SATURATE */
-#define ADD(x, y) (x) + (y)
-#define SUB(x, y) (x) - (y)
-#endif /* SATURATE */
-
-/** Performs a pixelwise addition used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
- *
- * The following computations will be performed:
- *
- * -# Add offset terms to inputs
- -# Get scaled value of two inputs
- * -# Add inputs
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The number of bits to shift left of input tensors must be passed at compile time using -DLEFT_SHIFT
- * @attention The offset, scalar scale factor and number of bits to shift right of input tensors must be passed at compile time using -DIN1_OFFSET, -RIN1_MULT_INT, -DIN1_SHIFT, -DIN2_OFFSET, -RIN2_MULT_INT and -DIN2_SHIFT
- * @attention The offset, scalar scale factor and number of bits to shift right of output tensor must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
- *
- * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The inputs and output scale information of qasymm8 need to be passed at compile time using -DSCALE_IN1, -DSCALE_IN2 and -DSCALE_OUT:
- * e.g. -DSCALE_IN1=1.f -DSCALE_IN2=1.f -DSCALE_OUT=2.f
- * @attention The inputs and output scale offset need to be passed at compile time using -DOFFSET_IN1, -DOFFSET_IN2 and -DOFFSET_OUT:
- * e.g. -DOFFSET_IN1=0 -DOFFSET_IN2=0 -DOFFSET_OUT=0
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
- *
- * @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8
- * @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] in2_ptr Pointer to the source tensor. Supported data types: QASYMM8
- * @param[in] in2_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: QASYMM8
- * @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void arithmetic_add_qasymm8(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out))
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(int, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16));
- VEC_DATA_TYPE(int, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16));
-
- // Get scaled value of two inputs
- VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET);
- VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET);
-
- VEC_DATA_TYPE(int, 16) left_shift = (VEC_DATA_TYPE(int, 16))1 << (VEC_DATA_TYPE(int, 16))(LEFT_SHIFT);
- VEC_DATA_TYPE(int, 16) shifted_in1_val = in1_val * left_shift;
- VEC_DATA_TYPE(int, 16) shifted_in2_val = in2_val * left_shift;
-
- VEC_DATA_TYPE(int, 16) scaled_in1_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in1_val, IN1_MULT_INT, IN1_SHIFT, 16);
- VEC_DATA_TYPE(int, 16) scaled_in2_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in2_val, IN2_MULT_INT, IN2_SHIFT, 16);
-
- // Add inputs and multiply with a multiplier smaller than 1
- VEC_DATA_TYPE(int, 16) sum_val = scaled_in1_val + scaled_in2_val;
- VEC_DATA_TYPE(int, 16) out_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(sum_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
- out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
-
- VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
-
-// TODO: Apply min-max BOUND to support fuse with relu.
-/*
-#if defined(MIN_BOUND)
- res = max(res, (uchar16)MIN_BOUND);
-#endif // defined(MIN_BOUND)
-#if defined(MAX_BOUND)
- res = min(res, (uchar16)MAX_BOUND);
-#endif // defined(MAX_BOUND)
-*/
-
- // Store result
- VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)),
- 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl
deleted file mode 100644
index ad6a48a02..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT)
-/** Perform batch to space rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor batch should be given as a preprocessor argument using -DBATCH_OUT=size. e.g. -DBATCH_OUT=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE0=size. e.g. -DBLOCK_SIZE0=1
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p inpu
-t_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in
-bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void batch_to_space_nd(
- TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output))
- {
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int out_index[4]={0};
- int in_index[4]={0};
-
- out_index[0] = get_global_id(0);//W
- out_index[1] = get_global_id(1);//H
- out_index[2] = get_global_id(2) % DEPTH_OUT;//C
- out_index[3] = get_global_id(2) / DEPTH_OUT;//N
-
- in_index[0] = out_index[0]/BLOCK_SIZE1;
- in_index[1] = out_index[1]/BLOCK_SIZE0;
- in_index[2] = out_index[2];
- in_index[3] = out_index[3] + ((out_index[1] % BLOCK_SIZE0) * BLOCK_SIZE0 + out_index[0] % BLOCK_SIZE1) * BATCH_OUT;
-
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3]));
- }
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
deleted file mode 100644
index bea61f53e..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(OP_CODE) && defined(DATA_TYPE)
-/** returns truth value of the two input tensors for BINARY LOGICAL OP.
- * where BINARY LOGICAL OP can be AND, OR.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=uchar
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using
- * -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input1_ptr Pointer to the source tensor. Supported data types: QASYMM8
- * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[in] input2_ptr Pointer to the source tensor.Supported data types: QASYMM8
- * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- */
-__kernel void binary_logical_op(
- TENSOR3D_DECLARATION(input1),
- TENSOR3D_DECLARATION(input2),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1);
- Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- #if OP_CODE == 1 // LOGICAL AND
- VSTORE(VEC_SIZE)
- (CONVERT(VLOAD(VEC_SIZE)
- (0, (__global DATA_TYPE *)input1.ptr) && VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr);
-
- #elif OP_CODE == 2 // LOGICAL OR
- VSTORE(VEC_SIZE)
- (CONVERT(VLOAD(VEC_SIZE)
- (0, (__global DATA_TYPE *)input1.ptr) || VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr),
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), 0, (__global DATA_TYPE *)output.ptr);
-
- #else // OP NOT SUPPORTED
- return
-
- #endif
-}
-#endif //if defined(OP_CODE) && defined(DATA_TYPE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
deleted file mode 100644
index 3d4675e5d..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef SCALE
-#define SCALE 1.0f
-#endif
-#ifndef OFFSET
-#define OFFSET 0
-#endif
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
-/** Perform a cast operation on an input tensor.
- *
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void cast(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr),
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),
- 0, (__global DATA_TYPE_OUT *)output.ptr);
-}
-
-/** Perform a cast operation on an QASYMM8 input tensor.
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Offset and Scale of input should be given as a preprocessor argument using -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void cast_qasymm_in(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) in_data =
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
- VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET);
- VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE);
-
- VEC_DATA_TYPE(int, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(int, VEC_SIZE)) - offset;
- VEC_DATA_TYPE(float, VEC_SIZE) out_data = CONVERT(tmp, VEC_DATA_TYPE(float, VEC_SIZE)) * scale;
-
- VSTORE(VEC_SIZE)(CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),
- 0, (__global DATA_TYPE_OUT *)output.ptr);
-}
-
-
-/** Perform a cast operation on an QASYMM8 output tensor.
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Offset and Scale of output should be given as a preprocessor argument using -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void cast_qasymm_out(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) in_data =
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
- VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET);
- VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE);
-
- VEC_DATA_TYPE(float, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(float, VEC_SIZE)) / scale;
- VEC_DATA_TYPE(float, VEC_SIZE) out_data = tmp + CONVERT(offset, VEC_DATA_TYPE(float, VEC_SIZE));
-
- VSTORE(VEC_SIZE)(CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),
- 0, (__global DATA_TYPE_OUT *)output.ptr);
-}
-#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl
deleted file mode 100644
index 765072556..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op.cl
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) && defined(OP_CODE)
-/** Returns truth value of comparison operators.
- * Comparison operators may be equal, not_equal etc.
- *
- * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN, -DDATA_TYPE_OUT,
- * e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT = uchar
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using
- * -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input1_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[in] input2_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void comparison_op(
- TENSOR3D_DECLARATION(input1),
- TENSOR3D_DECLARATION(input2),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1);
- Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- #if OP_CODE == 1 //EQUAL
- VSTORE(VEC_SIZE)
- (CONVERT(VLOAD(VEC_SIZE)
- (0, (__global DATA_TYPE_IN *)input1.ptr) == VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input2.ptr),
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),0, (__global DATA_TYPE_OUT *)output.ptr);
-
- #elif OP_CODE == 2 //NOT_EQUAL
- VSTORE(VEC_SIZE)
- (CONVERT(VLOAD(VEC_SIZE)
- (0, (__global DATA_TYPE_IN *)input1.ptr) != VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input2.ptr),
- VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr);
-
- #else // OP NOT SUPPORTED
- return;
-
- #endif
-}
-#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) && defined(OP_CODE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl
deleted file mode 100644
index 1eb305f7b..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-#define SUB(x, y) (x) - (y)
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(DATA_TYPE_OUT)
-
-#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
-#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_OUT VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
-
-/** Returns the truth value of comparison .
- * @attention Offset and Scale of both input should be given as a preprocessor argument using -DOFFSET_IN1=int, -DOFFSET_IN2=int, -DSCALE_IN1=float and -DSCALE_IN2=float. e.g. -DOFFSET_IN1=1, -DOFFSET_IN2=0, -DSCALE_IN1=0.5, -DSCALE_IN2=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using
- * -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input1_ptr Pointer to the source tensor. Supported data types: QASYMM8
- * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[in] input2_ptr Pointer to the source tensor. Supported data types: QASYMM8
- * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void comparison_op_qasymm8(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out))
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
- VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
-
- in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
- in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
-
- const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
- const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
-
- #if OPCODE == 1 //EQUAL QUANTIZED
- VSTORE(VEC_SIZE)(CONVERT(in1f32 == in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr);
-
- #elif OPCODE == 2 //NOT EQUAL QUANTIZED
- VSTORE(VEC_SIZE)(CONVERT(in1f32 != in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr);
-
- #else // OP NOT SUPPORTED
- return;
-
- #endif
-}
-#endif // defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(DATA_TYPE_OUT)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
deleted file mode 100644
index fef2243e7..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. -DBLOCK_SIZE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p inpu
-t_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in
-bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void depth_to_space(
- TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output))
- {
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int out_index[4]={0};
- int in_index[4]={0};
-
- out_index[0] = get_global_id(0);//W
- out_index[1] = get_global_id(1);//H
- out_index[2] = get_global_id(2) % DEPTH_OUT;//C
- out_index[3] = get_global_id(2) / DEPTH_OUT;//B
-
- in_index[0] = out_index[0]/BLOCK_SIZE;
- in_index[1] = out_index[1]/BLOCK_SIZE;
- in_index[2] = out_index[2] + ((out_index[1] % BLOCK_SIZE) * BLOCK_SIZE + out_index[0] % BLOCK_SIZE) * DEPTH_OUT;
- in_index[3] = out_index[3];
-
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2],in_index[3]));
- }
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
deleted file mode 100644
index 348458fe9..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS)
-/** Perform embedding_lookup of input tensor
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16
- * @attention Number of input dimensions are passed as a preprocessor argument using -DNUM_DIMS=size, e.g. -DNUM_DIMS=4
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] lookups_ptr Pointer to the lookups vector. Supported data types: S32
- * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in bytes)
- * @param[in] lookups_step_x lookups_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups vector
- */
-
-__kernel void embedding_lookup(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- VECTOR_DECLARATION(lookups))
-{
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT);
-
- Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups);
-
- //lookup ids for based on the tensor dimensions
- int lup_id[4] = {0};
-
- lup_id[0] = (NUM_DIMS == 1)?*((__global int *)vector_offset(&lups,get_global_id(0)))
- :get_global_id(0);
- lup_id[1] = (NUM_DIMS == 2)?*((__global int *)vector_offset(&lups,get_global_id(1)))
- :get_global_id(1);
- lup_id[2] = (NUM_DIMS == 3)?*((__global int *)vector_offset(&lups,get_global_id(2)))
- :get_global_id(2)%DEPTH_OUT;
- lup_id[3] = (NUM_DIMS == 4)?*((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
- :get_global_id(2) / DEPTH_OUT;
-
- in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + lup_id[1] * input_step_y
- + lup_id[2] * input_step_z + lup_id[3] * input_step_w;
-
- VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
- 0, (__global DATA_TYPE *)out.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl
deleted file mode 100644
index 69d94f30a..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/exp.cl
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE)
-/** Perform an exponential operation on an input tensor.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @note Can only take floating point data types.
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void exp_layer(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VSTORE(VEC_SIZE)
- (exp(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr)), 0, (__global DATA_TYPE *)output.ptr);
-}
-#endif // defined(DATA_TYPE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl
deleted file mode 100644
index 6b767d6c9..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/gather.cl
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-/** Perform gather
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- *
- * @param[in] input1_ptr Pointer to the first source tensor. Supported data types: U8/S32/F32
- * @param[in] input1_stride_x Stride of the first source tensor in X dimension (in bytes)
- * @param[in] input1_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the first source tensor in Y dimension (in bytes)
- * @param[in] input1_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the first source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the first source tensor
- * @param[in] input2_ptr Pointer to the first source tensor. Supported data types: U32
- * @param[in] input2_stride_x Stride of the first source tensor in X dimension (in bytes)
- * @param[in] input2_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the first source tensor
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void gather(IMAGE_DECLARATION(input1),
- VECTOR_DECLARATION(input2),
- IMAGE_DECLARATION(output))
-{
- Image in1 = CONVERT_TO_IMAGE_STRUCT_NO_STEP(input1);
- Vector in2 = CONVERT_TO_VECTOR_STRUCT(input2);
- Image out = CONVERT_TO_IMAGE_STRUCT_NO_STEP(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN2, 2)
- in2_data = CONVERT(vload2(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_IN2, 2));
-
- //TODO: performance tuning for memcopy
- int index = in2_data.s0;
- int stride=input1_stride_y/input1_stride_x;
-
- for(int i=0; i<stride; i++){
- *((__global DATA_TYPE_OUT *)offset(&out, i,get_global_id(0)))=*((__global DATA_TYPE_IN1 *)offset(&in1, i,index));
- }
-}
-
-__kernel void gather_1d_out(IMAGE_DECLARATION(input1),
- VECTOR_DECLARATION(input2),
- VECTOR_DECLARATION(output))
-{
- Image in1 = CONVERT_TO_IMAGE_STRUCT_NO_STEP(input1);
- Vector in2 = CONVERT_TO_VECTOR_STRUCT(input2);
- Vector out = CONVERT_TO_VECTOR_STRUCT_NO_STEP(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN2, 2)
- in2_data = CONVERT(vload2(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_IN2, 2));
-
- //TODO: performance tuning for memcopy
- int index = in2_data.s0;
- int stride=input1_stride_y/input1_stride_x;
-
- for(int i=0; i<stride; i++){
- *((__global DATA_TYPE_OUT *)vector_offset(&out, i+get_global_id(0)))=*((__global DATA_TYPE_IN1 *)offset(&in1, i, index));
- }
-}
-
-__kernel void gather_1d(VECTOR_DECLARATION(input1),
- VECTOR_DECLARATION(input2),
- VECTOR_DECLARATION(output))
-{
- Vector in1 = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input1);
- Vector in2 = CONVERT_TO_VECTOR_STRUCT(input2);
- Vector out = CONVERT_TO_VECTOR_STRUCT_NO_STEP(output);
-
- VEC_DATA_TYPE(DATA_TYPE_IN2, 2)
- in2_data = CONVERT(vload2(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_IN2, 2));
-
- //TODO: performance tuning for memcopy
- int index = in2_data.s0;
- *((__global DATA_TYPE_OUT *)vector_offset(&out, get_global_id(0)))=*((__global DATA_TYPE_IN1 *)vector_offset(&in1, index));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
deleted file mode 100644
index ed7409852..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS)
-/** Perform hashtable_lookup of input tensor
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=depth. e.g. -DDEPTH_OUT=16
- * @attention Number of input dimensions are passed as a preprocessor argument using -DNUM_DIMS=size, e.g. -DNUM_DIMS=4
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] lookups_ptr Pointer to the lookups vector. Supported data types: S32
- * @param[in] lookups_stride_x Stride of the lookups vector in X dimension (in bytes)
- * @param[in] lookups_step_x lookups_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] lookups_offset_first_element_in_bytes The offset of the first element in the lookups vector
- */
-__kernel void hashtable_lookup(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- VECTOR_DECLARATION(lookups))
-{
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, DEPTH_OUT);
-
- Vector lups = CONVERT_TO_VECTOR_STRUCT_NO_STEP(lookups);
-
- int lup_id[4] = {0};
-
- lup_id[0] = (NUM_DIMS == 1)?*((__global int *)vector_offset(&lups,get_global_id(0)))
- :get_global_id(0);
- lup_id[1] = (NUM_DIMS == 2)?*((__global int *)vector_offset(&lups,get_global_id(1)))
- :get_global_id(1);
- lup_id[2] = (NUM_DIMS == 3)?*((__global int *)vector_offset(&lups,get_global_id(2)))
- :get_global_id(2)%DEPTH_OUT;
- lup_id[3] = (NUM_DIMS == 4)?*((__global int *)vector_offset(&lups, get_global_id(2) / DEPTH_OUT))
- :get_global_id(2) / DEPTH_OUT;
-
- if (lup_id[NUM_DIMS-1] < 0)
- {
- VSTORE(VEC_SIZE)((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0, 0, (__global DATA_TYPE *)out.ptr);
- return;
- }
-
- in.ptr += input_offset_first_element_in_bytes + lup_id[0] * input_step_x + lup_id[1] * input_step_y
- + lup_id[2] * input_step_z + lup_id[3] * input_step_w;
-
- VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
- 0, (__global DATA_TYPE *)out.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(NUM_DIMS)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
deleted file mode 100644
index 0e123ae0a..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_HELPER_H
-#define ARM_COMPUTE_HELPER_H
-
-#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(cl_khr_fp16)
-
-#if defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
-#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
-#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ENABLED) && defined(cl_arm_integer_dot_product_int8)
-
-#if defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) && \
- defined(cl_arm_integer_dot_product_accumulate_int8)
-#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
-#endif // defined(ARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED) &&
- // defined(cl_arm_integer_dot_product_accumulate_int8)
-
-#if defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
-#pragma OPENCL EXTENSION cl_arm_printf : enable
-#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
-
-#define EXPAND(x) x
-
-#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
-
-#define VLOAD_STR(size) vload##size
-#define VLOAD(size) VLOAD_STR(size)
-
-#define VSTORE_STR(size) vstore##size
-#define VSTORE(size) VSTORE_STR(size)
-
-#define VEC_DATA_TYPE_STR(type, size) type##size
-#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
-
-#define CL_VEC_DATA_TYPE_STR(type, size) type##size
-#define CL_VEC_DATA_TYPE(type, size) CL_VEC_DATA_TYPE_STR(type, size)
-
-#define CONVERT_STR(x, type) (convert_##type((x)))
-#define CONVERT(x, type) CONVERT_STR(x, type)
-
-#define CONVERT_SAT_STR(x, type) (convert_##type##_sat((x)))
-#define CONVERT_SAT(x, type) CONVERT_SAT_STR(x, type)
-
-#define CONVERT_SAT_ROUND_STR(x, type, round) (convert_##type##_sat_##round((x)))
-#define CONVERT_SAT_ROUND(x, type, round) CONVERT_SAT_ROUND_STR(x, type, round)
-
-#define VECTOR_DECLARATION(name) \
- __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, \
- uint name##_offset_first_element_in_bytes
-
-#define IMAGE_DECLARATION(name) \
- __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_offset_first_element_in_bytes
-
-#define TENSOR3D_DECLARATION(name) \
- __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_stride_z, uint name##_step_z, \
- uint name##_offset_first_element_in_bytes
-
-#define TENSOR4D_DECLARATION(name) \
- __global uchar *name##_ptr, uint name##_stride_x, uint name##_step_x, uint name##_stride_y, \
- uint name##_step_y, uint name##_stride_z, uint name##_step_z, uint name##_stride_w, \
- uint name##_step_w, uint name##_offset_first_element_in_bytes
-
-#define CONVERT_TO_VECTOR_STRUCT(name) \
- update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
- name##_step_x)
-
-#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
- update_vector_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0)
-
-#define CONVERT_TO_IMAGE_STRUCT(name) \
- update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
- name##_step_x, name##_stride_y, name##_step_y)
-
-#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
- update_image_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, 0, \
- name##_stride_y, 0)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
- update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
- name##_stride_x, name##_step_x, name##_stride_y, \
- name##_step_y, name##_stride_z, name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
- update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
- name##_stride_x, 0, name##_stride_y, 0, name##_stride_z, \
- name##_step_z)
-
-#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
- update_image_from_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, \
- name##_stride_x, name##_step_x, name##_stride_y, \
- name##_step_y, name##_stride_z, name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT(name) \
- update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
- name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \
- name##_step_z)
-
-#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
- update_tensor3D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
- 0, name##_stride_y, 0, name##_stride_z, 0)
-
-#define CONVERT_TO_TENSOR4D_STRUCT(name, mod_size) \
- update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
- name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, \
- name##_step_z, name##_stride_w, name##_step_w, mod_size)
-
-#define CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(name, mod_size) \
- update_tensor4D_workitem_ptr(name##_ptr, name##_offset_first_element_in_bytes, name##_stride_x, \
- 0, name##_stride_y, 0, name##_stride_z, 0, name##_stride_w, 0, \
- mod_size)
-
-/** Structure to hold Vector information */
-typedef struct Vector
-{
- __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
- int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- int stride_x; /**< Stride of the image in X dimension (in bytes) */
-} Vector;
-
-/** Structure to hold Image information */
-typedef struct Image
-{
- __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
- int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- int stride_x; /**< Stride of the image in X dimension (in bytes) */
- int stride_y; /**< Stride of the image in Y dimension (in bytes) */
-} Image;
-
-/** Structure to hold 3D tensor information */
-typedef struct Tensor3D
-{
- __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
- int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- int stride_x; /**< Stride of the image in X dimension (in bytes) */
- int stride_y; /**< Stride of the image in Y dimension (in bytes) */
- int stride_z; /**< Stride of the image in Z dimension (in bytes) */
-} Tensor3D;
-
-/** Structure to hold 4D tensor information */
-typedef struct Tensor4D
-{
- __global uchar *ptr; /**< Pointer to the starting postion of the buffer */
- int offset_first_element_in_bytes; /**< The offset of the first element in the source image */
- int stride_x; /**< Stride of the image in X dimension (in bytes) */
- int stride_y; /**< Stride of the image in Y dimension (in bytes) */
- int stride_z; /**< Stride of the image in Z dimension (in bytes) */
- int stride_w; /**< Stride of the image in W dimension (in bytes) */
-} Tensor4D;
-
-/** Wrap vector information into an Vector structure, and make the pointer point at this workitem's
- * data.
- *
- * @param[in] ptr Pointer to the starting postion of the buffer
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
- * @param[in] stride_x Stride of the vector in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per
- * workitem(in bytes)
- *
- * @return An image object
- */
-inline Vector update_vector_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes,
- uint stride_x, uint step_x)
-{
- Vector vector = {
- .ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
- };
- vector.ptr += vector.offset_first_element_in_bytes + get_global_id(0) * step_x;
- return vector;
-}
-
-/** Wrap image information into an Image structure, and make the pointer point at this workitem's
- * data.
- *
- * @param[in] ptr Pointer to the starting postion of the buffer
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per
- * workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per
- * workitem(in bytes)
- *
- * @return An image object
- */
-inline Image update_image_workitem_ptr(__global uchar *ptr, uint offset_first_element_in_bytes,
- uint stride_x, uint step_x, uint stride_y, uint step_y)
-{
- Image img = {.ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
- .stride_y = stride_y};
- img.ptr +=
- img.offset_first_element_in_bytes + get_global_id(0) * step_x + get_global_id(1) * step_y;
- return img;
-}
-
-/** Wrap 3D tensor information into an image structure, and make the pointer point at this
- * workitem's data.
- *
- * @param[in] ptr Pointer to the starting postion of the buffer
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per
- * workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per
- * workitem(in bytes)
- * @param[in] stride_z Stride of the image in Z dimension (in bytes)
- * @param[in] step_z stride_z * number of elements along Z processed per
- * workitem(in bytes)
- *
- * @return A 3D tensor object
- */
-inline Image update_image_from_tensor3D_workitem_ptr(__global uchar *ptr,
- uint offset_first_element_in_bytes,
- uint stride_x, uint step_x, uint stride_y,
- uint step_y, uint stride_z, uint step_z)
-{
- Image img = {.ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
- .stride_y = stride_y};
- img.ptr += img.offset_first_element_in_bytes + get_global_id(0) * step_x +
- get_global_id(1) * step_y + get_global_id(2) * step_z;
- return img;
-}
-
-/** Wrap 3D tensor information into an tensor structure, and make the pointer point at this
- * workitem's data.
- *
- * @param[in] ptr Pointer to the starting postion of the buffer
- * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] stride_x Stride of the image in X dimension (in bytes)
- * @param[in] step_x stride_x * number of elements along X processed per
- * workitem(in bytes)
- * @param[in] stride_y Stride of the image in Y dimension (in bytes)
- * @param[in] step_y stride_y * number of elements along Y processed per
- * workitem(in bytes)
- * @param[in] stride_z Stride of the image in Z dimension (in bytes)
- * @param[in] step_z stride_z * number of elements along Z processed per
- * workitem(in bytes)
- *
- * @return A 3D tensor object
- */
-inline Tensor3D update_tensor3D_workitem_ptr(__global uchar *ptr,
- uint offset_first_element_in_bytes, uint stride_x,
- uint step_x, uint stride_y, uint step_y, uint stride_z,
- uint step_z)
-{
- Tensor3D tensor = {.ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
- .stride_y = stride_y,
- .stride_z = stride_z};
- tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x +
- get_global_id(1) * step_y + get_global_id(2) * step_z;
- return tensor;
-}
-
-inline Tensor4D update_tensor4D_workitem_ptr(__global uchar *ptr,
- uint offset_first_element_in_bytes, uint stride_x,
- uint step_x, uint stride_y, uint step_y, uint stride_z,
- uint step_z, uint stride_w, uint step_w, uint mod_size)
-{
- Tensor4D tensor = {.ptr = ptr,
- .offset_first_element_in_bytes = offset_first_element_in_bytes,
- .stride_x = stride_x,
- .stride_y = stride_y,
- .stride_z = stride_z,
- .stride_w = stride_w};
-
- tensor.ptr += tensor.offset_first_element_in_bytes + get_global_id(0) * step_x +
- get_global_id(1) * step_y + (get_global_id(2) % mod_size) * step_z +
- (get_global_id(2) / mod_size) * step_w;
- return tensor;
-}
-
-/** Get the pointer position of a Vector
- *
- * @param[in] vec Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- */
-inline __global const uchar *vector_offset(const Vector *vec, int x)
-{
- return vec->ptr + x * vec->stride_x;
-}
-
-/** Get the pointer position of a Image
- *
- * @param[in] img Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- */
-inline __global uchar *offset(const Image *img, int x, int y)
-{
- return img->ptr + x * img->stride_x + y * img->stride_y;
-}
-
-/** Get the pointer position of a Tensor3D
- *
- * @param[in] tensor Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- * @param[in] z Relative Z position
- */
-inline __global const uchar *tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
-{
- return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z;
-}
-
-/** Get the pointer position of a Tensor4D
- *
- * @param[in] tensor Pointer to the starting position of the buffer
- * @param[in] x Relative X position
- * @param[in] y Relative Y position
- * @param[in] z Relative Z position
- * @param[in] w Relative W position
- */
-inline __global const uchar *tensor4D_offset(const Tensor4D *tensor, int x, int y, int z, int w)
-{
- return tensor->ptr + x * tensor->stride_x + y * tensor->stride_y + z * tensor->stride_z +
- w * tensor->stride_w;
-}
-
-#endif // _HELPER_H
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
deleted file mode 100644
index c39138caa..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_HELPERS_ASYMM_H
-#define ARM_COMPUTE_HELPERS_ASYMM_H
-
-#include "helpers.h"
-
-/** Correctly-rounded-to-nearest division by a power-of-two.
- *
- * @param[in] size Size of vector.
- *
- * @return Correctly-rounded-to-nearest division by a power-of-two.
- */
-#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_rounding_divide_by_POW2_##size(VEC_DATA_TYPE(int, size) x, int exponent) \
- { \
- VEC_DATA_TYPE(int, size) \
- mask = (1 << exponent) - 1; \
- const VEC_DATA_TYPE(int, size) zero = 0; \
- const VEC_DATA_TYPE(int, size) one = 1; \
- VEC_DATA_TYPE(int, size) \
- threshold = (mask >> 1) + select(zero, one, x < 0); \
- return (x >> exponent) + select(zero, one, (x & mask) > threshold); \
- }
-
-/** Product of two numbers, interpreting them as fixed-point values in the interval [-1, 1),
- * rounding to the nearest value, and saturating -1 * -1 to the maximum value.
- *
- * @param[in] size Size of vector.
- *
- * @return Product of two fixed-point numbers.
- */
-#define ASYMM_MULT_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_mult##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
- { \
- VEC_DATA_TYPE(int, size) \
- overflow = a == b && a == INT_MIN; \
- VEC_DATA_TYPE(long, size) \
- a_64 = convert_long##size(a); \
- VEC_DATA_TYPE(long, size) \
- b_64 = convert_long##size(b); \
- VEC_DATA_TYPE(long, size) \
- ab_64 = a_64 * b_64; \
- /* COMPMID-907 */ \
- VEC_DATA_TYPE(int, size) \
- ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31)); \
- return select(ab_x2_high32, INT_MAX, overflow); \
- }
-
-/** Calculates \f$ exp(x) \f$ for x in [-1/4, 0).
- *
- * @param[in] size Size of vector.
- *
- * @return Result in fixed-point format Q0.
- */
-#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(VEC_DATA_TYPE(int, size) \
- a) \
- { \
- const VEC_DATA_TYPE(int, size) constant_term = 1895147668; \
- const VEC_DATA_TYPE(int, size) constant_1_over_3 = 715827883; \
- const int k_fractional_bits = 31; \
- VEC_DATA_TYPE(int, size) \
- x = a + (1 << (k_fractional_bits - 3)); \
- VEC_DATA_TYPE(int, size) \
- x2 = ASYMM_MULT(x, x, size); \
- VEC_DATA_TYPE(int, size) \
- x3 = ASYMM_MULT(x2, x, size); \
- VEC_DATA_TYPE(int, size) \
- x4 = ASYMM_MULT(x2, x2, size); \
- VEC_DATA_TYPE(int, size) \
- x4_over_4 = ASYMM_ROUNDING_DIVIDE_BY_POW2(x4, 2, size); \
- VEC_DATA_TYPE(int, size) \
- x4_over_24_plus_x3_over_6_plus_x2 = \
- ASYMM_MULT((x4_over_4 + x3), constant_1_over_3, size) + x2; \
- VEC_DATA_TYPE(int, size) \
- x4_over_24_plus_x3_over_6_plus_x2_over_2 = \
- ASYMM_ROUNDING_DIVIDE_BY_POW2(x4_over_24_plus_x3_over_6_plus_x2, 1, size); \
- return constant_term + \
- ASYMM_MULT(constant_term, x + x4_over_24_plus_x3_over_6_plus_x2_over_2, size); \
- }
-
-/** Each bit of the result is set to the corresponding bit of either then_val or
- * else_val depending on whether the corresponding bit of if_mask is set.
- * Equivalent to the VBSL instruction in ARM NEON.
- *
- * @param[in] size Size of vector.
- *
- * @returns Result contaning bits from @p then_val or from @p else_val depending on corresponding
- * bit in @p if_mask is set or not.
- */
-#define ASYMM_SELECT_USING_MASK_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) asymm_select_using_mask##size(VEC_DATA_TYPE(int, size) if_mask, \
- VEC_DATA_TYPE(int, size) then_val, \
- VEC_DATA_TYPE(int, size) else_val) \
- { \
- return (if_mask & then_val) ^ (~if_mask & else_val); \
- }
-
-/** For each element of input vector, the corresponding bits of the result item are set
- * if the input item is zero.
- *
- * @param[in] size Size of vector.
- *
- * @returns Output vector with bits set when corresponding bit in @p a is zero.
- */
-#define ASYMM_MASK_IF_ZERO_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) asymm_mask_if_zero##size(VEC_DATA_TYPE(int, size) a) \
- { \
- const VEC_DATA_TYPE(int, size) all_zeros = 0; \
- const VEC_DATA_TYPE(int, size) all_ones = ~0; \
- return select(all_zeros, all_ones, a == 0); \
- }
-
-/** For each element of input vector, the corresponding bits of the result item are set
- * if the input item is non-zero.
- *
- * @param[in] size Size of vector.
- *
- * @returns Output vector with bits set when corresponding bit in @p a is non zero.
- */
-#define ASYMM_MASK_IF_NON_ZERO_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) asymm_mask_if_non_zero##size(VEC_DATA_TYPE(int, size) a) \
- { \
- const VEC_DATA_TYPE(int, size) all_zeros = 0; \
- const VEC_DATA_TYPE(int, size) all_ones = ~0; \
- return select(all_zeros, all_ones, a != 0); \
- }
-
-#define EXP_BARREL_SHIFTER_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) exp_barrel_shifter##size( \
- VEC_DATA_TYPE(int, size) result, int exponent, int fp_multiplier, int k_integer_bits, \
- int k_fractional_bits, VEC_DATA_TYPE(int, size) remainder) \
- { \
- if (k_integer_bits > exponent) \
- { \
- const int k_shift_amount = k_integer_bits > exponent ? k_fractional_bits + exponent : 0; \
- return ASYMM_SELECT_USING_MASK( \
- ASYMM_MASK_IF_NON_ZERO(remainder & (1 << k_shift_amount), size), \
- ASYMM_MULT(result, fp_multiplier, size), result, size); \
- } \
- \
- return result; \
- }
-
-/** Calculates \f$ exp(x) \f$ for x < 0.
- *
- * @param[in] size Size of vector.
- *
- * @return Result in fixed-point format Q0.
- */
-#define ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_exp_on_negative_values##size(VEC_DATA_TYPE(int, size) a, int k_integer_bits) \
- { \
- const int k_fractional_bits = 31 - k_integer_bits; \
- VEC_DATA_TYPE(int, size) \
- k_one_quarter = 1 << (k_fractional_bits - 2); \
- VEC_DATA_TYPE(int, size) \
- mask = k_one_quarter - 1; \
- VEC_DATA_TYPE(int, size) \
- a_mod_quarter_minus_one_quarter = (a & mask) - k_one_quarter; \
- VEC_DATA_TYPE(int, size) \
- a_mod_quarter_minus_one_quarter_scaled = a_mod_quarter_minus_one_quarter << k_integer_bits; \
- VEC_DATA_TYPE(int, size) \
- result = ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL( \
- a_mod_quarter_minus_one_quarter_scaled, size); \
- VEC_DATA_TYPE(int, size) \
- remainder = a_mod_quarter_minus_one_quarter - a; \
- \
- result = EXP_BARREL_SHIFTER(result, -2, 1672461947, k_integer_bits, k_fractional_bits, \
- remainder, size); \
- result = EXP_BARREL_SHIFTER(result, -1, 1302514674, k_integer_bits, k_fractional_bits, \
- remainder, size); \
- result = EXP_BARREL_SHIFTER(result, +0, 790015084, k_integer_bits, k_fractional_bits, \
- remainder, size); \
- result = EXP_BARREL_SHIFTER(result, +1, 290630308, k_integer_bits, k_fractional_bits, \
- remainder, size); \
- result = EXP_BARREL_SHIFTER(result, +2, 39332535, k_integer_bits, k_fractional_bits, \
- remainder, size); \
- result = EXP_BARREL_SHIFTER(result, +3, 720401, k_integer_bits, k_fractional_bits, remainder, \
- size); \
- result = \
- EXP_BARREL_SHIFTER(result, +4, 242, k_integer_bits, k_fractional_bits, remainder, size); \
- \
- if (k_integer_bits > 5) \
- { \
- const VEC_DATA_TYPE(int, size) clamp = -(1 << (k_fractional_bits + 5)); \
- result = ASYMM_SELECT_USING_MASK(ASYMM_MASK_IF_NON_ZERO(a < clamp, size), 0, result, size); \
- } \
- \
- const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \
- return ASYMM_SELECT_USING_MASK(ASYMM_MASK_IF_ZERO(a, size), Q0_one, result, size); \
- }
-
-/** Calculates the product of a integer value by a power of two, with either a positive exponent
- * (equivalent to an arithmetic left shift, saturating) or a negative exponent
- * (equivalent to an arithmetic right shift, rounding to nearest).
- *
- * @param[in] size Size of vector.
- *
- * @return Arithmetic left or right shift.
- */
-#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_saturating_rounding_mult_by_pow2##size(VEC_DATA_TYPE(int, size) x, int exponent) \
- { \
- if (exponent < 0) \
- { \
- return ASYMM_ROUNDING_DIVIDE_BY_POW2(x, -exponent, size); \
- } \
- \
- const VEC_DATA_TYPE(int, size) min = INT_MIN; \
- const VEC_DATA_TYPE(int, size) max = INT_MAX; \
- int threshold = ((1 << (31 - exponent)) - 1); \
- VEC_DATA_TYPE(int, size) \
- positive_mask = ASYMM_MASK_IF_NON_ZERO(x > threshold, size); \
- VEC_DATA_TYPE(int, size) \
- negative_mask = ASYMM_MASK_IF_NON_ZERO(x < -threshold, size); \
- VEC_DATA_TYPE(int, size) \
- result = x << exponent; \
- result = ASYMM_SELECT_USING_MASK(positive_mask, max, result, size); \
- result = ASYMM_SELECT_USING_MASK(negative_mask, min, result, size); \
- return result; \
- }
-
-/** Calculates (a+b)/2, rounded to the nearest integer.
- * Equivalent to VRHADD in the ARM NEON instruction set.
- *
- * @param[in] size Size of vector.
- *
- * @return (a+b)/2, rounded to the nearest integer.
- */
-#define ASYMM_ROUNDING_HALF_SUM_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_rounding_half_sum##size(VEC_DATA_TYPE(int, size) a, VEC_DATA_TYPE(int, size) b) \
- { \
- VEC_DATA_TYPE(long, size) \
- a64 = convert_long##size(a); \
- VEC_DATA_TYPE(long, size) \
- b64 = convert_long##size(b); \
- VEC_DATA_TYPE(long, size) \
- sum = a64 + b64; \
- const VEC_DATA_TYPE(long, size) one = 1; \
- const VEC_DATA_TYPE(long, size) minus_one = -1; \
- VEC_DATA_TYPE(long, size) \
- sign = select(minus_one, one, sum >= 0); \
- return convert_int##size((sum + sign) / 2); \
- }
-
-/** Calculates \f$ 1 / (1 + x) \f$ for x in (0, 1).
- *
- * @param[in] size Size of vector.
- *
- * @return Result in fixed-point format Q0.
- */
-#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) \
- asymm_one_over_one_plus_x_for_x_in_0_1##size(VEC_DATA_TYPE(int, size) a) \
- { \
- const VEC_DATA_TYPE(int, size) Q0_one = INT_MAX; \
- const VEC_DATA_TYPE(int, size) Q2_one = 1 << (31 - 2); \
- VEC_DATA_TYPE(int, size) \
- half_denominator = ASYMM_ROUNDING_HALF_SUM(a, Q0_one, size); \
- const VEC_DATA_TYPE(int, size) Q2_48_over_17 = 1515870810; \
- const VEC_DATA_TYPE(int, size) Q2_neg_32_over_17 = -1010580540; \
- VEC_DATA_TYPE(int, size) \
- x = Q2_48_over_17 + ASYMM_MULT(half_denominator, Q2_neg_32_over_17, size); \
- for (int i = 0; i < 3; i++) \
- { \
- VEC_DATA_TYPE(int, size) \
- half_denominator_times_x = ASYMM_MULT(half_denominator, x, size); \
- VEC_DATA_TYPE(int, size) \
- one_minus_half_denominator_times_x = Q2_one - half_denominator_times_x; \
- VEC_DATA_TYPE(int, size) \
- tmp = ASYMM_MULT(x, one_minus_half_denominator_times_x, size); \
- x = x + ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(tmp, 2, size); \
- } \
- return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(x, 1, size); \
- }
-
-/** Considering the integer value as fixed-point, change the number of integer bits and update value
- * accordingly.
- *
- * @param[in] size Size of vector.
- *
- * @return Rescaled value.
- */
-#define ASYMM_RESCALE_IMPL(size) \
- inline VEC_DATA_TYPE(int, size) asymm_rescale##size(VEC_DATA_TYPE(int, size) value, \
- int src_integer_bits, int dst_integer_bits) \
- { \
- int exponent = src_integer_bits - dst_integer_bits; \
- return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size); \
- }
-
-#define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) \
- asymm_rounding_divide_by_POW2_##size(x, exponent)
-#define ASYMM_MULT(a, b, size) asymm_mult##size(a, b)
-#define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \
- ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(x, quantized_multiplier, size), right_shift, size)
-#define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(a, size) \
- asymm_exp_on_interval_between_negative_one_quarter_and_0_excl##size(a)
-#define ASYMM_SELECT_USING_MASK(if_mask, then_val, else_val, size) \
- asymm_select_using_mask##size(if_mask, then_val, else_val)
-#define ASYMM_MASK_IF_ZERO(a, size) asymm_mask_if_zero##size(a)
-#define ASYMM_MASK_IF_NON_ZERO(a, size) asymm_mask_if_non_zero##size(a)
-#define EXP_BARREL_SHIFTER(result, exponent, fp_multiplier, k_integer_bits, k_fractional_bits, \
- remainder, size) \
- exp_barrel_shifter##size(result, exponent, fp_multiplier, k_integer_bits, k_fractional_bits, \
- remainder)
-#define ASYMM_EXP_ON_NEGATIVE_VALUES(a, k_integer_bits, size) \
- asymm_exp_on_negative_values##size(a, k_integer_bits)
-#define ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(a, size) \
- asymm_one_over_one_plus_x_for_x_in_0_1##size(a)
-#define ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(x, exponent, size) \
- asymm_saturating_rounding_mult_by_pow2##size(x, exponent)
-#define ASYMM_ROUNDING_HALF_SUM(a, b, size) asymm_rounding_half_sum##size(a, b)
-#define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \
- asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
-
-ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2)
-ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4)
-ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8)
-ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16)
-
-ASYMM_MULT_IMPL(2)
-ASYMM_MULT_IMPL(4)
-ASYMM_MULT_IMPL(8)
-ASYMM_MULT_IMPL(16)
-
-ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(2)
-ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(4)
-ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(8)
-ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(16)
-
-ASYMM_SELECT_USING_MASK_IMPL(2)
-ASYMM_SELECT_USING_MASK_IMPL(4)
-ASYMM_SELECT_USING_MASK_IMPL(8)
-ASYMM_SELECT_USING_MASK_IMPL(16)
-
-ASYMM_MASK_IF_ZERO_IMPL(2)
-ASYMM_MASK_IF_ZERO_IMPL(4)
-ASYMM_MASK_IF_ZERO_IMPL(8)
-ASYMM_MASK_IF_ZERO_IMPL(16)
-
-ASYMM_MASK_IF_NON_ZERO_IMPL(2)
-ASYMM_MASK_IF_NON_ZERO_IMPL(4)
-ASYMM_MASK_IF_NON_ZERO_IMPL(8)
-ASYMM_MASK_IF_NON_ZERO_IMPL(16)
-
-EXP_BARREL_SHIFTER_IMPL(2)
-EXP_BARREL_SHIFTER_IMPL(4)
-EXP_BARREL_SHIFTER_IMPL(8)
-EXP_BARREL_SHIFTER_IMPL(16)
-
-ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(2)
-ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(4)
-ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(8)
-ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(16)
-
-ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(2)
-ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(4)
-ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(8)
-ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(16)
-
-ASYMM_ROUNDING_HALF_SUM_IMPL(2)
-ASYMM_ROUNDING_HALF_SUM_IMPL(4)
-ASYMM_ROUNDING_HALF_SUM_IMPL(8)
-ASYMM_ROUNDING_HALF_SUM_IMPL(16)
-
-ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(2)
-ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(4)
-ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(8)
-ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(16)
-
-ASYMM_RESCALE_IMPL(2)
-ASYMM_RESCALE_IMPL(4)
-ASYMM_RESCALE_IMPL(8)
-ASYMM_RESCALE_IMPL(16)
-
-#endif // ARM_COMPUTE_HELPERS_ASYMM_H \ No newline at end of file
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
deleted file mode 100644
index e3aa463db..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE)
-/** Performs a negation of input tensor.
- *
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- *
- * @param[in] in_ptr Pointer to the source image. Supported data types: S16/S32/F16/F32.
- * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
- * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
- */
-__kernel void neg_tensor(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VSTORE(VEC_SIZE)
- (-VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr), 0, (__global DATA_TYPE *)output.ptr);
-}
-#endif // defined(DATA_TYPE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl
deleted file mode 100644
index ecf4696e9..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pad.cl
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && defined(ZERO_VALUE)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- * @attention Input dimensions should be passed as a preprocessor argument using -DIW(width), -DIH(height), -DID(depth) and -DIB(batch). e.g. -DIW = 4
- * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p inpu
-t_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in
-bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- *
- * @param[in] pad_values Padding values for each of the dimensions. Only pad values for Up(for
- * batch), Top(for height), Left(for width) and Front(for depth) are
- * required. Supported data type: S32
- */
-
-__kernel void pad(
- TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- const int4 pad_values)
- {
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int index[4]={0};
-
- index[0] = get_global_id(0);//W
- index[1] = get_global_id(1);//H
- index[2] = get_global_id(2) % DEPTH_OUT;//C
- index[3] = get_global_id(2) / DEPTH_OUT;//N
-
- if (index[0] < pad_values.x || index[0] >= (IW + pad_values.x) ||
- index[1] < pad_values.y || index[1] >= (IH + pad_values.y) ||
- index[2] < pad_values.z || index[2] >= (ID + pad_values.z) ||
- index[3] < pad_values.w || index[3] >= (IB + pad_values.w))
- {
- *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE;
- }
- else
- {
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)
- tensor4D_offset(&in, index[0] - pad_values.x,
- index[1] - pad_values.y,
- index[2] - pad_values.z,
- index[3] - pad_values.w));
- }
- }
-
-#endif //if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && defined(ZERO_VALUE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl
deleted file mode 100644
index 7cc8b0354..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && defined(P4)
-/** Perform a Generic permute operation on an input tensor of Shape DCHW.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
- * @attention Permutation vector is passed as a preprocessor arguement using -DP1, -DP2, -DP3 and -DP4=int, e.g. -DP1=2
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U1
-6/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in b
-ytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in b
-ytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in b
-ytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p inpu
-t_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in
-bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void permute_generic(
- TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
- int out_index[4];
- int in_index[4];
- in_index[0] = get_global_id(0);//W
- in_index[1] = get_global_id(1);//H
- in_index[2] = get_global_id(2) % DEPTH_IN;//C
- in_index[3] = get_global_id(2) / DEPTH_IN;//B
- out_index[0] = in_index[P1];
- out_index[1] = in_index[P2];
- out_index[2] = in_index[P3];
- out_index[3] = in_index[P4];
-
- *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0],out_index[1],out_index[2],out_index[3])) = *((__global DATA_TYPE *)in.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && defined(P4)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl
deleted file mode 100644
index aa05121b1..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifdef SATURATE
-#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x))
-#else /* SATURATE */
-#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x))
-#endif /* SATURATE */
-#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round)
-
-/** Performs a pixelwise division with float scale of either integer or float inputs.
- *
- * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short
- * @attention The data type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES.
- * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short.
- * @attention -DDATA_TYPE_FLOAT must be passed if floating point inputs are provided.
- *
- * @param[in] in1_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
- * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
- * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] scale Float scaling factor. Supported data types: F32
- */
-__kernel void pixelwise_div_float(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out),
- const float scale)
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
-
- // Perform division
-#ifdef DATA_TYPE_FLOAT
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- res = CONVERT(in1_data / in2_data * (DATA_TYPE_RES)scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
-#else /* DATA_TYPE_FLOAT */
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- res = CONVERT_OP_FLOAT(CONVERT_OP_FLOAT((convert_float16(in1_data / in2_data) * scale), VEC_DATA_TYPE(DATA_TYPE_RES, 16), ROUND), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), ROUND);
-#endif /* DATA_TYPE_FLOAT */
-
- // Store result
- vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl
deleted file mode 100644
index fdfb78003..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(SATURATE)
-#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size##_sat(x))
-#else // SATURATE
-#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size(x))
-#endif // SATURATE
-#define CONVERT_OP_INT(x, type, size) CONVERT_OP_INT_STR(x, type, size)
-
-#define DIV_OP(x, y, scale, type, size) CONVERT_OP_INT((x) / (y) >> scale, type, size)
-
-/** Performs a pixelwise division with integer scale of integer inputs.
- *
- * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short
- * @attention The data_type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES.
- * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short.
- *
- * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/S16
- * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr
- * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] scale Integer scaling factor. Supported data types: S32
- */
-__kernel void pixelwise_div_int(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out),
- const uint scale)
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
-
- // Perform division and store result
- vstore16(DIV_OP(in1_data, in2_data, scale, DATA_TYPE_OUT, 16), 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
deleted file mode 100644
index ab1307e64..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers_asymm.h"
-
-#ifdef SATURATE
-#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x))
-#else /* SATURATE */
-#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x))
-#endif /* SATURATE */
-#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round)
-
-#if defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
-/** Performs a pixelwise multiplication used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
- *
- * The following computations will be performed by the kernel:
- *
- * -# Add offset terms to inputs
- * -# Multiply inputs
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The offset factor of inputs must be passed at compile time using -DIN1_OFFSET and -DIN2_OFFSET
- * @attention The offset, scalar scale factor and number of bits to shift right of output tensor must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and -DRESULT_SHIFT
- *
- * @param[in] in1_ptr Pointer to the source image. Supported data types: U8
- * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: U8
- * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] scale Float scaling factor. Supported data types: F32
- */
-__kernel void pixelwise_mul_qasymm8(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out),
- const float scale)
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(int, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16));
- VEC_DATA_TYPE(int, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16));
-
- // Perform multiplication of two inputs
- VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET);
- VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET);
- VEC_DATA_TYPE(int, 16) out_val = in1_val * in2_val;
-
- // Multiply with a multiplier smaller than 1
- out_val = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(out_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
- out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
-
- VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
-
-// TODO: Apply min-max BOUND to support fuse with relu.
-/*
-#if defined(MIN_BOUND)
- res = max(res, (uchar16)MIN_BOUND);
-#endif // defined(MIN_BOUND)
-#if defined(MAX_BOUND)
- res = min(res, (uchar16)MAX_BOUND);
-#endif // defined(MAX_BOUND)
-*/
-
- // Store result
- VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)),
- 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
-#endif // defined(RESULT_OFFSET) && defined(RESULT_MULT_INT) && defined(RESULT_SHIFT)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
deleted file mode 100644
index 68da2ba32..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE)
-/** Returns result of prelu function implemented as below:
- * f(input) = alpha * input for input < 0, f(input) = input for input >= 0.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @note Can only take floating point data types.
- *
- * @param[in] input1_ptr Pointer to the source image. Supported Data types : F16/F32
- * @param[in] input1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source image
- *
- * @param[in] alpha_ptr Pointer to the source image. Supported Data types : F16/F32
- * @param[in] alpha_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] alpha_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] alpha_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] alpha_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] alpha_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source image
- *
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void prelu(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(alpha),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VSTORE(VEC_SIZE)
- (VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) < 0 ?
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) * VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)alpha.ptr) :
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr),
- 0, (__global DATA_TYPE *)output.ptr);
-
-}
-#endif // defined(DATA_TYPE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
deleted file mode 100644
index 7e97b7ed6..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-#define SUB(x, y) (x) - (y)
-
-#if defined(OFF_IN1) && defined(OFF_IN2) && defined(OFF_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(VEC_SIZE)
-
-#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
-#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
-#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
-#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
-
-/** Returns result of prelu function implemented as below:
- * f(input) = alpha * input for input < 0, f(input) = input for input >= 0.
- *
- * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g. -DDATA_TYPE_IN=uchar
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @note Can only take uchar data types.
- *
- * @param[in] input1_ptr Pointer to the source image. Supported Data types : QASYMM8
- * @param[in] input1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source image
- *
- * @param[in] alpha_ptr Pointer to the source image. Supported Data types : QASYMM8
- * @param[in] alpha_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] alpha_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] alpha_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] alpha_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] alpha_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] alpha_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] alpha_offset_first_element_in_bytes The offset of the first element in the source image
- *
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void prelu_qasymm8(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(alpha),
- TENSOR3D_DECLARATION(output))
-{
- // Get pixels pointer
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)input.ptr), VEC_INT);
- VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)alpha.ptr), VEC_INT);
-
- in_a = SUB(in_a, (VEC_INT)((int)OFF_IN1));
- in_b = SUB(in_b, (VEC_INT)((int)OFF_IN2));
-
- const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
- const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
- const VEC_FLOAT outf32 = in1f32 < 0 ? in1f32 * in2f32 : in1f32;
- const VEC_FLOAT qresf32 = outf32 / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFF_OUT));
- const VEC_UCHAR res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
-
- VSTORE(VEC_SIZE)
- (res, 0, (__global uchar *)output.ptr);
-}
-
-#endif // defined(OFF_IN1) && defined(OFF_IN2) && defined(OFF_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) && defined(VEC_SIZE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
deleted file mode 100644
index 8bef49363..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
-/** Perform reduce max/min
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using
- * -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] axis Axis through which reduction occurs
- * @param[in] dim Dimension across the axis to be reduced.
- */
-__kernel void reduce_min_max(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- const int axis,
- const int dim)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int indices[4] =
- {
- get_global_id(0),
- get_global_id(1),
- get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
- };
-
- DATA_TYPE value = *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
- for(int i = 1; i < dim; ++i)
- {
- indices[axis] = i;
-
- #if OP_CODE == 1 // REDUCE_MAX
- value = max(value, *((__global DATA_TYPE *)
- tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])));
-
- #elif OP_CODE == 2 // REDUCE_MIN
- value = min(value, *((__global DATA_TYPE *)
- tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3])));
-
- #else // OP NOT SUPPORTED
- return;
-
- #endif
- }
-
- *((__global DATA_TYPE *)out.ptr) = value;
-}
-
-/** Perform reduce sum/mean
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- * @attention Operation type(code) specifying which operation to perform should be passed as preprocessor argument using
- * -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] input_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] axis Axis through which reduction occurs
- * @param[in] dim Dimension across the axis to be reduced.
- */
-__kernel void reduce_sum_mean(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- const int axis,
- const int dim)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int indices[4] =
- {
- get_global_id(0),
- get_global_id(1),
- get_global_id(2) % DEPTH_OUT,
- get_global_id(2) / DEPTH_OUT,
- };
-
- DATA_TYPE sum_value = (DATA_TYPE)0;
- for(int i = 0; i < dim; ++i)
- {
- indices[axis] = i;
- sum_value += *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
- }
-
- #if OP_CODE == 3 // REDUCE_SUM
- *((__global DATA_TYPE *)out.ptr) = sum_value;
-
- #elif OP_CODE == 4 // REDUCE_MEAN
- *((__global DATA_TYPE *)out.ptr) = sum_value / CONVERT(dim, DATA_TYPE);
-
- #else // OP NOT SUPPORTED
- return;
-
- #endif
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
deleted file mode 100644
index a0fc2d5a9..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE)
-/** Perform space to batch with input of 4D and NCHW format
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- * @attention Input tensor batch should be given as a preprocessor argument using -DBATCH_IN=size. e.g. -DBATCH_IN=16
- * @attention Input tensor height should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DHEIGHT_IN=16
- * @attention Input tensor width should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DWIDTH_IN=16
- * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] block_size_ptr Pointer to the source tensor. Supported data types: S32
- * @param[in] block_size_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] block_size_step_x block_size_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] block_size_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] padding_size_ptr Pointer to the source tensor. Supported data types: S32
- * @param[in] padding_size_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] padding_size_step_x padding_size_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] padding_size_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] padding_size_step_y padding_size_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] padding_size_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void space_to_batch_4d_nchw(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- VECTOR_DECLARATION(block_size),
- IMAGE_DECLARATION(padding_size))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int block_size_x = *((__global int *)(block_size_ptr));
- int block_size_y = *((__global int *)(block_size_ptr + block_size_stride_x));
- int shift_x = (get_global_id(2) / DEPTH_OUT / BATCH_IN) % block_size_x;
- int shift_y = (get_global_id(2) / DEPTH_OUT / BATCH_IN) / block_size_x;
-
- int in_index[4] = {0, };
- in_index[0] = get_global_id(0) * block_size_x + shift_x - *((__global int *)(padding_size_ptr));
- in_index[1] = get_global_id(1) * block_size_y + shift_y - *((__global int *)(padding_size_ptr + padding_size_stride_y));
- in_index[2] = get_global_id(2) % DEPTH_OUT;
- in_index[3] = (get_global_id(2) / DEPTH_OUT) % BATCH_IN;
-
- if (in_index[0] < 0 || in_index[0] >= WIDTH_IN || in_index[1] < 0 || in_index[1] >= HEIGHT_IN)
- {
- *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE;
- }
- else
- {
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3]));
- }
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE)
-
-#if defined(DATA_TYPE) && defined(HEIGHT_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE) && defined(VEC_SIZE)
-/** Perform space to batch with input of 4D and NHWC format
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DHEIGHT_OUT=size. e.g. -DHEIGHT_OUT=16
- * @attention Input tensor batch should be given as a preprocessor argument using -DBATCH_IN=size. e.g. -DBATCH_IN=16
- * @attention Input tensor height should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DHEIGHT_IN=16
- * @attention Input tensor width should be given as a preprocessor argument using -DHEIGHT_IN=size. e.g. -DWIDTH_IN=16
- * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] block_size_ptr Pointer to the source tensor. Supported data types: S32
- * @param[in] block_size_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] block_size_step_x block_size_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] block_size_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] padding_size_ptr Pointer to the source tensor. Supported data types: S32
- * @param[in] padding_size_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] padding_size_step_x padding_size_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] padding_size_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] padding_size_step_y padding_size_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] padding_size_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void space_to_batch_4d_nhwc(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- VECTOR_DECLARATION(block_size),
- IMAGE_DECLARATION(padding_size))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, HEIGHT_OUT);
-
- int block_size_x = *((__global int *)(block_size_ptr));
- int block_size_y = *((__global int *)(block_size_ptr + block_size_stride_x));
- int shift_x = (get_global_id(2) / HEIGHT_OUT / BATCH_IN) % block_size_x;
- int shift_y = (get_global_id(2) / HEIGHT_OUT / BATCH_IN) / block_size_x;
-
- int in_index[4] = {0, };
- in_index[0] = get_global_id(0) * VEC_SIZE;
- in_index[1] = get_global_id(1) * block_size_x + shift_x - *((__global int *)(padding_size_ptr));
- in_index[2] = get_global_id(2) % HEIGHT_OUT * block_size_y + shift_y - *((__global int *)(padding_size_ptr + padding_size_stride_y));
- in_index[3] = (get_global_id(2) / HEIGHT_OUT) % BATCH_IN;
-
- if (in_index[1] < 0 || in_index[1] >= WIDTH_IN || in_index[2] < 0 || in_index[2] >= HEIGHT_IN)
- {
- VSTORE(VEC_SIZE)((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))ZERO_VALUE, 0, (__global DATA_TYPE *)out.ptr);
- }
- else
- {
- VSTORE(VEC_SIZE)(CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3])),
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
- 0, (__global DATA_TYPE *)out.ptr);
- }
-}
-
-#endif // defined(DATA_TYPE) && defined(HEIGHT_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && defined(WIDTH_IN) && defined(ZERO_VALUE) && defined(VEC_SIZE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
deleted file mode 100644
index f6977045a..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size. e.g. -DDEPTH_IN=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g. -DBLOCK_SIZE=1
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p inpu
-t_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in
-bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void space_to_depth(
- TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output))
- {
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
- int out_index[4]={0};
- int in_index[4]={0};
-
- in_index[0] = get_global_id(0);//W
- in_index[1] = get_global_id(1);//H
- in_index[2] = get_global_id(2) % DEPTH_IN;//C
- in_index[3] = get_global_id(2) / DEPTH_IN;//B
-
- out_index[0] = in_index[0]/BLOCK_SIZE;
- out_index[1] = in_index[1]/BLOCK_SIZE;
- out_index[2] = in_index[2] + ((in_index[1] % BLOCK_SIZE) * BLOCK_SIZE + in_index[0] % BLOCK_SIZE) * DEPTH_IN;
- out_index[3] = in_index[3];
-
- *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0],out_index[1],out_index[2],out_index[3])) = *((__global DATA_TYPE *)in.ptr);
- }
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl
deleted file mode 100644
index 3e1a5c97f..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE)
-/** Returns true value of squared_difference of two tensors.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @note Can only take floating point data types.
- *
- * @param[in] input1_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source image
- *
- * @param[in] input2_ptr Pointer to the source image. Supported data types: F16/F32
- * @param[in] input2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source image
- *
- * @param[out] output_ptr Pointer to the destination image. Supported data types: F16/F32
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void squared_difference(
- TENSOR3D_DECLARATION(input1),
- TENSOR3D_DECLARATION(input2),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input1 = CONVERT_TO_TENSOR3D_STRUCT(input1);
- Tensor3D input2 = CONVERT_TO_TENSOR3D_STRUCT(input2);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- diff = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr)- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr);
-
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- sq_diff = diff * diff;
-
- VSTORE(VEC_SIZE)
- (sq_diff, 0, (__global DATA_TYPE *)output.ptr);
-}
-#endif // defined(DATA_TYPE)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl
deleted file mode 100644
index b39c55b96..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT)
-/** Extracts a strided slice up to 4-dimensions
- *
- * @note Datatype should be given as a preprocessor argument using -DELEMENT_DATA_TYPE=type. e.g. -DELEMENT_DATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] starts The stride of X dimension of input tensor to be sliced. Supported data types: S32
- * @param[in] strides The stride of Y dimension of input tensor to be sliced. Supported data types: S32
- */
-__kernel void strided_slice_ex(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- const int4 starts,
- const int4 strides)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int4 indices_in =
- {
- starts.x + (strides.x * get_global_id(0)),
- starts.y + (strides.y * get_global_id(1)),
- starts.z + (strides.z * (get_global_id(2) % DEPTH_OUT)),
- starts.w + (strides.w * (get_global_id(2) / DEPTH_OUT)),
- };
- *((__global ELEMENT_DATA_TYPE *)out.ptr) = *((__global ELEMENT_DATA_TYPE *)tensor4D_offset(&in, indices_in.x, indices_in.y, indices_in.z, indices_in.w));
-}
-#endif // defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT)
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
deleted file mode 100644
index d97f23a47..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "helpers.h"
-
-__kernel void topkv2_init(VECTOR_DECLARATION(input),
- __global float* in_key_buf,
- __global int* in_ind_buf,
- const int n)
-{
- int gid = get_global_id(0);
- int lws = get_local_size(0);
- int groups = get_num_groups(0);
- int gws = lws * groups;
- int iter = n / gws;
-
- Vector input = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input);
-
- for(int i = 0; i < iter; ++i)
- {
- int idx = i * gws + gid;
- in_key_buf[idx] = *(__global float*)(input.ptr + idx * input.stride_x);
- in_ind_buf[idx] = idx;
- }
-}
-
-__kernel void topkv2_find_first_negative(
- __global float *out_key_buf,
- __global int *first_negative_idx,
- int n)
-{
- int gid = get_global_id(0);
-
- if( gid == n - 1 )
- {
- // if the last item is positive, the first negative index is n.
- if( out_key_buf[gid] > 0.f )
- *first_negative_idx = n;
- } else if ( gid == 0 ) {
- // if the first item is negative, set it 0.
- if( out_key_buf[gid] < 0.f )
- *first_negative_idx = 0;
- } else {
- // if its left is positive and it is negative, then it is the first negative item.
- if( out_key_buf[gid-1] > 0.f && out_key_buf[gid] < 0.f )
- *first_negative_idx = gid;
- }
-}
-
-__kernel void topkv2_reorder_negatives(
- __global float* in_key_buf,
- __global float* out_key_buf,
- __global float* in_ind_buf,
- __global float* out_ind_buf,
- __global int* first_negative_idx,
- int n)
-{
- int gid = get_global_id(0);
-
- int num_negs = n - *first_negative_idx;
- int in_idx;
-
- if( gid < num_negs ) {
- in_idx = n - 1 - gid;
- } else {
- in_idx = gid - num_negs;
- }
-
- out_key_buf[gid] = in_key_buf[in_idx];
- out_ind_buf[gid] = in_ind_buf[in_idx];
-}
-
-__kernel void topkv2_store(
- VECTOR_DECLARATION(values),
- VECTOR_DECLARATION(indices),
- __global float *out_key_buf,
- __global int *out_ind_buf,
- int n)
-{
- int gid = get_global_id(0);
-
- Vector values = CONVERT_TO_VECTOR_STRUCT_NO_STEP(values);
- Vector indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(indices);
-
- int idx = n - 1 - gid;
-
- *(__global float*)(values.ptr + gid * values.stride_x) = out_key_buf[idx];
- *(__global int*)(indices.ptr + gid * indices.stride_x) = out_ind_buf[idx];
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
deleted file mode 100644
index 0292fab04..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "helpers.h"
-
-__global inline float* get_vec_elem(Vector* vec, int idx)
-{
- return (__global float*)(vec->ptr + idx * vec->stride_x);
-}
-
-__global inline int* get_vec_elem_int(Vector* vec, int idx)
-{
- return (__global int*)(vec->ptr + idx * vec->stride_x);
-}
-
-// A utility function to swap two elements
-void swap(__global float *a, __global float *b)
-{
- float t = *a;
- *a = *b;
- *b = t;
-}
-
-void swap_idx(__global int *a, __global int *b)
-{
- int t = *a;
- *a = *b;
- *b = t;
-}
-
-/* This function is same in both iterative and recursive*/
-int partition (Vector* arr, __global int* indices, int l, int h)
-{
- float x = *get_vec_elem(arr, h);
- int i = (l - 1);
-
- for (int j = l; j <= h- 1; j++)
- {
- if (*get_vec_elem(arr, j) >= x)
- {
- i++;
- swap (get_vec_elem(arr,i), get_vec_elem(arr,j));
- swap_idx(&indices[i], &indices[j]);
- }
- }
- swap (get_vec_elem(arr, i + 1), get_vec_elem(arr, h));
- swap_idx(&indices[i + 1], &indices[h]);
- return (i + 1);
-}
-
-/* A[] --> Array to be sorted,
- l --> Starting index,
- h --> Ending index */
-void quickSortIterative (Vector* arr, __global int* indices,
- __global int *stack, int l, int h)
-{
- // Create an auxiliary stack
-
- // initialize top of stack
- int top = -1;
-
- // push initial values of l and h to stack
- stack[ ++top ] = l;
- stack[ ++top ] = h;
-
- // Keep popping from stack while is not empty
- while ( top >= 0 )
- {
- // Pop h and l
- h = stack[ top-- ];
- l = stack[ top-- ];
-
- // Set pivot element at its correct position
- // in sorted array
- int p = partition( arr, indices, l, h );
-
- // If there are elements on left side of pivot,
- // then push left side to stack
- if ( p-1 > l )
- {
- stack[ ++top ] = l;
- stack[ ++top ] = p - 1;
- }
-
- // If there are elements on right side of pivot,
- // then push right side to stack
- if ( p+1 < h )
- {
- stack[ ++top ] = p + 1;
- stack[ ++top ] = h;
- }
- }
-}
-
-__kernel void topkv2_quicksort(VECTOR_DECLARATION(input),
- VECTOR_DECLARATION(topk_values), VECTOR_DECLARATION(topk_indices),
- __global int* indices, __global int* temp_stack, int k, int n)
-{
- Vector input = CONVERT_TO_VECTOR_STRUCT_NO_STEP(input);
- Vector topk_values = CONVERT_TO_VECTOR_STRUCT_NO_STEP(topk_values);
- Vector topk_indices = CONVERT_TO_VECTOR_STRUCT_NO_STEP(topk_indices);
-
- for( int i = 0; i < n; ++i )
- {
- indices[i] = i;
- }
-
- quickSortIterative(&input, indices, temp_stack, 0, n-1);
-
- // extract k items.
- for(int i = 0; i < k; ++i)
- {
- *get_vec_elem(&topk_values, i) = *get_vec_elem(&input, i);
- *get_vec_elem_int(&topk_indices, i) = indices[i];
- }
-}
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
deleted file mode 100644
index c2c2d89a4..000000000
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// reference:
-// https://code.google.com/archive/p/ocl-radix-sort/source/default/source
-// OpenCL kernel sources for the CLRadixSort class
-// the #include does not exist in OpenCL
-// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr
-// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html
-// if you find this software usefull you can cite the following work in your reports or articles:
-// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011.
-// http://hal.archives-ouvertes.fr/hal-00596730
-
-// Reference for floating point radix sort:
-// http://www.codercorner.com/RadixSortRevisited.htm
-
-// compute the histogram for each radix and each virtual processor for the pass
-__kernel void radixsort_histogram(__global float* in_key_buf,
- __global int* d_Histograms,
- const int pass,
- __local int* loc_histo,
- const int n)
-{
- int it = get_local_id(0); // i local number of the processor
- int ig = get_global_id(0); // global number = i + g I
-
- int gr = get_group_id(0); // g group number
-
- int groups = get_num_groups(0);
- int items = get_local_size(0);
-
- // set the local histograms to zero
- for(int ir=0;ir<_RADIX;ir++){
- loc_histo[ir * items + it] = 0;
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // range of keys that are analyzed by the work item
- int size= n/groups/items; // size of the sub-list
- int start= ig * size; // beginning of the sub-list
-
- unsigned int key;
- int shortkey,k;
-
- // compute the index
- // the computation depends on the transposition
- for(int j = 0; j < size ; j++) {
-#ifdef TRANSPOSE
- k= groups * items * j + ig;
-#else
- k=j+start;
-#endif
-
- key = *((__global unsigned int*)(in_key_buf + k));
-
- // extract the group of _BITS bits of the pass
- // the result is in the range 0.._RADIX-1
- shortkey=(( key >> (pass * _BITS)) & (_RADIX-1));
-
- // increment the local histogram
- loc_histo[shortkey * items + it ]++;
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // copy the local histogram to the global one
- for(int ir=0;ir<_RADIX;ir++) {
- d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it];
- }
-
- barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// initial transpose of the list for improving
-// coalescent memory access
-__kernel void transpose(const __global int* invect,
- __global int* outvect,
- const int nbcol,
- const int nbrow,
- const __global int* inperm,
- __global int* outperm,
- __local int* blockmat,
- __local int* blockperm,
- const int tilesize){
-
- int i0 = get_global_id(0)*tilesize; // first row index
- int j = get_global_id(1); // column index
-
- int jloc = get_local_id(1); // local column index
-
- // fill the cache
- for(int iloc=0;iloc<tilesize;iloc++){
- int k=(i0+iloc)*nbcol+j; // position in the matrix
- blockmat[iloc*tilesize+jloc]=invect[k];
-#ifdef PERMUT
- blockperm[iloc*tilesize+jloc]=inperm[k];
-#endif
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // first row index in the transpose
- int j0=get_group_id(1)*tilesize;
-
- // put the cache at the good place
- for(int iloc=0;iloc<tilesize;iloc++){
- int kt=(j0+iloc)*nbrow+i0+jloc; // position in the transpose
- outvect[kt]=blockmat[jloc*tilesize+iloc];
-#ifdef PERMUT
- outperm[kt]=blockperm[jloc*tilesize+iloc];
-#endif
- }
-
-}
-
-// each virtual processor reorders its data using the scanned histogram
-__kernel void radixsort_reorder(__global float* in_key,
- __global float* out_key,
- __global int* d_Histograms,
- const int pass,
- __global int* indices_in,
- __global int* indices_out,
- __local int* loc_histo,
- const int n){
-
- int it = get_local_id(0);
- int ig = get_global_id(0);
-
- int gr = get_group_id(0);
- int groups=get_num_groups(0);
- int items=get_local_size(0);
-
- int start= ig *(n/groups/items);
- int size= n/groups/items;
-
- // take the histogram in the cache
- for(int ir=0;ir<_RADIX;ir++){
- loc_histo[ir * items + it]=
- d_Histograms[items * (ir * groups + gr) + it];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int newpos,shortkey,k,newpost;
- unsigned int key;
-
- for(int j= 0; j< size;j++){
-#ifdef TRANSPOSE
- k= groups * items * j + ig;
-#else
- k=j+start;
-#endif
- float org_value = in_key[k];
- key = *(__global unsigned int*)(in_key + k);
- shortkey=((key >> (pass * _BITS)) & (_RADIX-1));
-
- newpos=loc_histo[shortkey * items + it];
-
-#ifdef TRANSPOSE
- int ignew,jnew;
- ignew= newpos/(n/groups/items);
- jnew = newpos%(n/groups/items);
- newpost = jnew * (groups*items) + ignew;
-#else
- newpost=newpos;
-#endif
-
- //d_outKeys[newpost]= key; // killing line !!!
- out_key[newpost] = org_value;
-
-#ifdef PERMUT
- indices_out[newpost] = indices_in[k];
-#endif
-
- newpos++;
- loc_histo[shortkey * items + it]=newpos;
- }
-}
-
-// perform a parallel prefix sum (a scan) on the local histograms
-// (see Blelloch 1990) each workitem worries about two memories
-// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html
-__kernel void radixsort_scanhistograms(__global int* histo, __local int* temp, __global int* globsum)
-{
- int it = get_local_id(0);
- int ig = get_global_id(0);
- int decale = 1;
- int n=get_local_size(0) * 2 ;
- int gr=get_group_id(0);
-
- // load input into local memory
- // up sweep phase
- temp[2*it] = histo[2*ig];
- temp[2*it+1] = histo[2*ig+1];
-
- // parallel prefix sum (algorithm of Blelloch 1990)
- for (int d = n>>1; d > 0; d >>= 1){
- barrier(CLK_LOCAL_MEM_FENCE);
- if (it < d){
- int ai = decale*(2*it+1)-1;
- int bi = decale*(2*it+2)-1;
- temp[bi] += temp[ai];
- }
- decale *= 2;
- }
-
- // store the last element in the global sum vector
- // (maybe used in the next step for constructing the global scan)
- // clear the last element
- if (it == 0) {
- globsum[gr]=temp[n-1];
- temp[n - 1] = 0;
- }
-
- // down sweep phase
- for (int d = 1; d < n; d *= 2){
- decale >>= 1;
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (it < d){
- int ai = decale*(2*it+1)-1;
- int bi = decale*(2*it+2)-1;
-
- int t = temp[ai];
- temp[ai] = temp[bi];
- temp[bi] += t;
- }
-
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // write results to device memory
-
- histo[2*ig] = temp[2*it];
- histo[2*ig+1] = temp[2*it+1];
-
- barrier(CLK_GLOBAL_MEM_FENCE);
-
-}
-
-// use the global sum for updating the local histograms
-// each work item updates two values
-__kernel void radixsort_pastehistograms( __global int* histo,__global int* globsum)
-{
- int ig = get_global_id(0);
- int gr=get_group_id(0);
-
- int s;
-
- s=globsum[gr];
-
- // write results to device memory
- histo[2*ig] += s;
- histo[2*ig+1] += s;
-
- barrier(CLK_GLOBAL_MEM_FENCE);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp
deleted file mode 100644
index 1fdd2f98f..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLActivationLayerExKernel.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/UtilsEx.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfoEx &act_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::F16, DataType::F32);
-
- // Checks performed when output is configured
- if ((output != nullptr) && (output->total_size() != 0))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- if (output != nullptr)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output, *input);
- }
-
- const unsigned int num_elems_processed_per_iteration = 16 / input->element_size();
-
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- bool window_changed = false;
-
- if (output != nullptr)
- {
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, input->valid_region());
- }
- else
- {
- window_changed = update_window_and_padding(
- win, AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration));
- }
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLActivationLayerExKernel::CLActivationLayerExKernel()
- : _input(nullptr), _output(nullptr), _run_in_place(false)
-{
-}
-
-void CLActivationLayerExKernel::configure(ICLTensor *input, ICLTensor *output,
- ActivationLayerInfoEx act_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- _run_in_place = (output == nullptr) || (output == input);
-
- if (output != nullptr)
- {
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), *input->info()->clone());
- }
-
- ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, act_info));
-
- const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
- const DataType dt = input->info()->data_type();
- float a_const = act_info.a();
- float b_const = act_info.b();
- int a_const_int = 0;
- int b_const_int = 0;
-
- // Create quantized version of constants a, b if needed
- if (is_data_type_quantized(dt))
- {
- a_const_int =
- input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP);
- b_const_int =
- input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP);
- }
-
- // Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(
- ("-DACT=" + lower_string(string_from_activation_func_ex(act_info.activation()))));
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- if (is_data_type_quantized(dt))
- {
- build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int)));
- build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
-
- const int o1 = input->info()->quantization_info().offset;
- // Quantized value of 0 corresponds to the offset o1
- build_opts.emplace(("-DCONST_0=" + support::cpp11::to_string(o1)));
-
- // Set scale and offset of the input and output if they have different quantization info
- if (is_data_type_quantized_asymmetric(dt) && output != nullptr)
- {
- const float s1 = input->info()->quantization_info().scale;
- const float s2 = output->info()->quantization_info().scale;
- const int o2 = output->info()->quantization_info().offset;
-
- if (o1 != o2 || s1 != s2)
- {
- build_opts.emplace(("-DS1_VAL=" + float_to_string_with_full_precision(s1)));
- build_opts.emplace(("-DS2_VAL=" + float_to_string_with_full_precision(s2)));
- build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1)));
- build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2)));
- }
- }
- }
- else
- {
- build_opts.emplace(("-DA_VAL=" + float_to_string_with_full_precision(a_const)));
- build_opts.emplace(("-DB_VAL=" + float_to_string_with_full_precision(b_const)));
- }
-
- build_opts.emplace((_run_in_place) ? "-DIN_PLACE" : "");
-
- // Create kernel
- std::string kernel_name = std::string("activation_layer_ex");
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Make sure _kernel is initialized before calling the parent's configure
- _input = input;
- _output = output;
-
- // Configure kernel window
- auto win_config =
- validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-
- // Set config_id for enabling LWS tuning
- _config_id = "activation_layer_ex_";
- _config_id += lower_string(string_from_data_type(dt));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-Status CLActivationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfoEx &act_info)
-{
- const bool run_in_place = (output == nullptr) || (output == input);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
- ARM_COMPUTE_RETURN_ON_ERROR(
- validate_and_configure_window(input->clone().get(),
- (run_in_place) ? nullptr : output->clone().get())
- .first);
-
- return Status{};
-}
-
-void CLActivationLayerExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
- Window slice = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- if (!_run_in_place)
- {
- add_3D_tensor_argument(idx, _output, slice);
- }
- enqueue(queue, *this, slice, lws_hint());
- } while (collapsed.slide_window_slice_3D(slice));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp
deleted file mode 100644
index c1a2ad0be..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLArgMinMaxKernel.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t argminmax_axis)
-{
- TensorShape out_shape{input_shape};
-
- out_shape.set(argminmax_axis, 1);
-
- return out_shape;
-}
-} // namespace
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t argminmax_axis, ArgOperation op)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32, DataType::F32,
- DataType::U8);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(input, output);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0,
- "Inputs are not broadcast compatible");
-
- const TensorShape output_shape = inferOutputShape(input->tensor_shape(), argminmax_axis);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(),
- "output shape's size does not match argminmax_axis");
-
- const auto num_dimensions = input->tensor_shape().num_dimensions();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- argminmax_axis >= 0 && argminmax_axis < num_dimensions,
- "argminmax_axis must be greater than or equal to 0 and less than (input's rank).");
- return Status{};
-}
-
-} // namespace
-
-CLArgMinMaxKernel::CLArgMinMaxKernel() : _input(nullptr), _output(nullptr), _argminmax_axis() {}
-
-void CLArgMinMaxKernel::configure(const ICLTensor *input, ICLTensor *output,
- const uint32_t argminmax_axis, ArgOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), argminmax_axis));
-
- _input = input;
- _output = output;
- _argminmax_axis = argminmax_axis;
-
- std::unique_ptr<ITensorInfo> output_info = output->info()->clone();
- output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), argminmax_axis));
-
- // Construct kernel name for argmax and argmin based on axis
- std::string kernel_name = "arg_op";
- int op_code = 0;
- if (op == ArgOperation::MAX)
- {
- op_code = 1;
- }
- else if (op == ArgOperation::MIN)
- {
- op_code = 2;
- }
- else
- throw std::runtime_error("Operation not supported, yet");
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(output_info->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2)));
- build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output_info, Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output_info->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-Status CLArgMinMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t argminmax_axis, ArgOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, argminmax_axis, op));
-
- return Status{};
-}
-
-void CLArgMinMaxKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &shape_in = _input->info()->tensor_shape();
-
- unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
-
- _kernel.setArg<cl_int>(idx++, _argminmax_axis);
- _kernel.setArg<cl_int>(idx++, shape_in[_argminmax_axis]);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup input slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- // Copy output's shape in order to use for recovering at end of this method
- const TensorShape shape_out = _output->info()->tensor_shape();
- _output->info()->set_tensor_shape(inferOutputShape(shape_in, _argminmax_axis));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-
- // Recover output's shape of output tensor
- _output->info()->set_tensor_shape(shape_out);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp
deleted file mode 100644
index 1c505b4d5..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLArithmeticSubtractionExKernel.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, ConvertPolicy policy)
-{
- ARM_COMPUTE_UNUSED(policy);
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
-
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
-
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->data_type() == DataType::U8 &&
- (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
- "Output can only be U8 if both inputs are U8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2,
- ITensorInfo *output)
-{
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output, out_shape);
-
- if (input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16)
- {
- set_format_if_unknown(*output, Format::S16);
- }
- else if (input1->data_type() == DataType::F16 && input2->data_type() == DataType::F16)
- {
- set_format_if_unknown(*output, Format::F16);
- }
- else if (input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output, Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input1);
- Window win_input2 = win.broadcast_if_dimension_le_one(*input2);
-
- AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLArithmeticSubtractionExKernel::CLArithmeticSubtractionExKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLArithmeticSubtractionExKernel::configure(const ICLTensor *input1, const ICLTensor *input2,
- ICLTensor *output, ConvertPolicy policy)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input1->info(), input2->info(), output->info(), policy));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- const bool has_float_out = is_data_type_float(output->info()->data_type());
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace((policy == ConvertPolicy::WRAP || has_float_out) ? "-DWRAP" : "-DSATURATE");
- build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("arithmetic_sub_ex", build_opts));
-
- ICLKernel::configure_internal(win_config.second);
-}
-
-Status CLArithmeticSubtractionExKernel::validate(const ITensorInfo *input1,
- const ITensorInfo *input2,
- const ITensorInfo *output, ConvertPolicy policy)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, policy));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(),
- input2->clone().get(),
- output->clone().get())
- .first);
-
- return Status{};
-}
-
-void CLArithmeticSubtractionExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input1->info()->tensor_shape();
- const TensorShape &in_shape2 = _input2->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
-
- add_3D_tensor_argument(idx, _input1, slice_input1);
- add_3D_tensor_argument(idx, _input2, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLArithmeticSubtractionExKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp
deleted file mode 100644
index b0016d23c..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const int32_t *block_size)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size[0] >= 1 && block_size[1] >= 1,
- "Block size should be greater than or equal to 1.");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) == output->dimension(2),
- "Input Depth should be equal to Output Depth");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->dimension(3) * block_size[0] * block_size[1] == input->dimension(3),
- "Input batch should be equal to (output batch * block size[0] *block size[1])");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(output->dimension(0) % block_size[1]) &&
- !(output->dimension(1) % block_size[0]),
- "Output height and width should be divisible by block size[0] "
- "and block_size[1] respectively");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) == input->dimension(0) * block_size[1]) &&
- (output->dimension(1) == input->dimension(1) * block_size[0]),
- "Output height and width should be equal to "
- "input_height*blocksize[0] and input_width*blocksize[1] "
- "respectively");
-
- return Status{};
-}
-
-} // namespace
-
-CLBatchToSpaceNDKernel::CLBatchToSpaceNDKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLBatchToSpaceNDKernel::configure(const ICLTensor *input, ICLTensor *output,
- const int32_t *block_size)
-{
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size));
-
- _input = input;
- _output = output;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBLOCK_SIZE0=" + support::cpp11::to_string(block_size[0]));
- build_opts.emplace("-DBLOCK_SIZE1=" + support::cpp11::to_string(block_size[1]));
- build_opts.emplace("-DBATCH_OUT=" + support::cpp11::to_string(output->info()->dimension(3)));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("batch_to_space_nd", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLBatchToSpaceNDKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_out(slice_in);
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_out.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_out);
- add_4D_tensor_argument(idx, _output, slice_in);
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
deleted file mode 100644
index 3d2f2c702..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_parameters(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output)
-{
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8,
- DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
- return Status{};
-}
-} // namespace
-
-CLBinaryLogicalOpKernel::CLBinaryLogicalOpKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLBinaryLogicalOpKernel::configure(const ICLTensor *input1, const ICLTensor *input2,
- ICLTensor *output, BinaryLogicalOperation op)
-{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_parameters(input1->info(), input2->info(), output->info()));
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- // Create kernel
- std::string kernel_name = "binary_logical_op";
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())));
-
- int op_code = 0;
- switch (op)
- {
- case BinaryLogicalOperation::AND:
- op_code = 1;
- break;
- case BinaryLogicalOperation::OR:
- op_code = 2;
- break;
- default:
- throw std::runtime_error("Operation not supported, yet");
- }
-
- build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code)));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
-
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info());
- Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info());
-
- AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLBinaryLogicalOpKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input1->info()->tensor_shape();
- const TensorShape &in_shape2 = _input2->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input1, slice_input1);
- add_3D_tensor_argument(idx, _input2, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLBinaryLogicalOpKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
deleted file mode 100644
index bf7ebae3f..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-CLCastKernel::CLCastKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
- _input = input;
- _output = output;
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- // Create kernel
- if (is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- const float scale_in = input->info()->quantization_info().scale;
- const int offset_in = input->info()->quantization_info().offset;
- build_opts.emplace("-DSCALE=" + float_to_string_with_full_precision(scale_in));
- build_opts.emplace("-DOFFSET=" + support::cpp11::to_string(offset_in));
-
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("cast_qasymm_in", build_opts));
- }
- else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
- {
- const float scale_in = output->info()->quantization_info().scale;
- const int offset_in = output->info()->quantization_info().offset;
- build_opts.emplace("-DSCALE=" + float_to_string_with_full_precision(scale_in));
- build_opts.emplace("-DOFFSET=" + support::cpp11::to_string(offset_in));
-
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("cast_qasymm_out", build_opts));
- }
- else
- {
- _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("cast", build_opts));
- }
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLCastKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
- Window slice = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- } while (collapsed.slide_window_slice_3D(slice));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp
deleted file mode 100644
index 5af5b16ea..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output)
-{
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::U16,
- DataType::S16, DataType::F16, DataType::S32,
- DataType::F32, DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::U16,
- DataType::S16, DataType::F16, DataType::S32,
- DataType::F32, DataType::QASYMM8);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
- return Status{};
-}
-} // namespace
-
-CLComparisonOpKernel::CLComparisonOpKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLComparisonOpKernel::configure(const ICLTensor *input1, const ICLTensor *input2,
- ICLTensor *output, const ComparisonOperation &op)
-{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info()));
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- // Create kernel
- std::string kernel_name = "comparison_op";
- int op_code = 0;
-
- switch (op)
- {
- case ComparisonOperation::EQUAL:
- op_code = 1;
- break;
- case ComparisonOperation::NOT_EQUAL:
- op_code = 2;
- break;
- default:
- throw std::runtime_error(" Operation not supported, yet");
- }
-
- std::set<std::string> build_opts;
- build_opts.emplace(("-DOP_CODE=" + support::cpp11::to_string(op_code)));
- build_opts.emplace(("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input1->info()->data_type())));
- build_opts.emplace(
- ("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- if (is_data_type_quantized_asymmetric(input1->info()->data_type()) &&
- ((input1->info()->quantization_info().offset != input2->info()->quantization_info().offset) ||
- (input1->info()->quantization_info().scale != input2->info()->quantization_info().scale)))
- {
- build_opts.emplace("-DOFFSET_IN1=" +
- support::cpp11::to_string(input1->info()->quantization_info().offset));
- build_opts.emplace("-DOFFSET_IN2=" +
- support::cpp11::to_string(input2->info()->quantization_info().offset));
- build_opts.emplace("-DSCALE_IN1=" +
- support::cpp11::to_string(input1->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_IN2=" +
- support::cpp11::to_string(input2->info()->quantization_info().scale));
- kernel_name += "_qasymm8";
- }
-
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
-
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output->info(), out_shape);
-
- if (input1->info()->data_type() == DataType::S16 ||
- input2->info()->data_type() == DataType::S16)
- {
- set_format_if_unknown(*output->info(), Format::S16);
- }
- else if (input1->info()->data_type() == DataType::F16 &&
- input2->info()->data_type() == DataType::F16)
- {
- set_format_if_unknown(*output->info(), Format::F16);
- }
- else if (input1->info()->data_type() == DataType::F32 ||
- input2->info()->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output->info(), Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info());
- Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info());
-
- AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLComparisonOpKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input1->info()->tensor_shape();
- const TensorShape &in_shape2 = _input2->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input1, slice_input1);
- add_3D_tensor_argument(idx, _input2, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLComparisonOpKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
deleted file mode 100644
index c386e3312..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const int32_t block_size)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size >= 1,
- "Block size should be greater than or equal to 1.");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(0) == input->dimension(0) * block_size,
- "Output width should be equal to (Input width * block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(1) == input->dimension(1) * block_size,
- "Output height should be equal to (Input height * block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) % (block_size * block_size) == 0,
- "Input depth should be divisible by (block size * block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->dimension(2) == input->dimension(2) / (block_size * block_size),
- "Output depth should be equal to (Input depth / (block size * block size))");
-
- return Status{};
-}
-} // namespace
-
-CLDepthToSpaceKernel::CLDepthToSpaceKernel() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CLDepthToSpaceKernel::configure(const ICLTensor *input, ICLTensor *output,
- const int32_t block_size)
-{
-
- _input = input;
- _output = output;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("depth_to_space", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLDepthToSpaceKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup input slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
deleted file mode 100644
index 0862b78bf..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
- input_access.set_valid_region(win, output->valid_region());
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLEmbeddingLookupKernel::CLEmbeddingLookupKernel()
- : _input(nullptr), _output(nullptr), _lookups(nullptr)
-{
-}
-
-Status CLEmbeddingLookupKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *lookups)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, lookups);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4);
- ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1);
-
- return Status{};
-}
-
-void CLEmbeddingLookupKernel::configure(const ICLTensor *input, ICLTensor *output,
- const ICLTensor *lookups)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info()));
-
- _input = input;
- _output = output;
- _lookups = lookups;
-
- // Set kernel build options
- std::stringstream kernel_name;
- std::set<std::string> build_opts;
- kernel_name << "embedding_lookup";
-
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-}
-
-void CLEmbeddingLookupKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- Window win_lookup;
- win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_in);
- add_1D_tensor_argument(idx, _lookups, win_lookup);
-
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_1D(win_lookup));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp
deleted file mode 100644
index b1ee21bdc..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLExpKernel.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLExpKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-CLExpKernel::CLExpKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLExpKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Auto initialize output
- auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(),
- input->info()->quantization_info());
-
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
-
- constexpr unsigned int num_elems_processed_per_iteration = 4;
-
- // Create kernel
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("exp_layer", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLExpKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
- Window slice = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- } while (collapsed.slide_window_slice_3D(slice));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp
deleted file mode 100644
index ae2801e2b..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLGatherKernel.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 1;
-
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S32,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S32,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
-
- return Status{};
-}
-
-} // namespace
-
-CLGatherKernel::CLGatherKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr) {}
-
-void CLGatherKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info()));
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- // Construct kernel name
- std::string kernel_name = "gather";
- if (input1->info()->num_dimensions() == 1)
- {
- kernel_name = "gather_1d";
- }
- else if (input1->info()->num_dimensions() == 2)
- {
- if (_output->info()->num_dimensions() == 1)
- {
- kernel_name = "gather_1d_out";
- }
- }
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input2->info(), Steps(num_elems_processed_per_iteration));
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-Status CLGatherKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output));
-
- return Status{};
-}
-
-void CLGatherKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- if (_input1->info()->num_dimensions() == 1)
- {
- Window slice = window.first_slice_window_1D();
-
- unsigned int idx = 0;
- add_1D_tensor_argument(idx, _input1, slice);
- add_1D_tensor_argument(idx, _input2, slice);
- add_1D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- }
- else if (_input1->info()->num_dimensions() == 2)
- {
- Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimY);
- Window slice = window.collapse_if_possible(ICLKernel::window(), Window::DimX);
-
- // Set inputs
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input1, window_collapsed);
- add_1D_tensor_argument(idx, _input2, slice);
- if (_output->info()->num_dimensions() == 1)
- {
- add_1D_tensor_argument(idx, _output, slice);
- }
- else
- {
- add_2D_tensor_argument(idx, _output, window_collapsed);
- }
- enqueue(queue, *this, slice);
- }
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
deleted file mode 100644
index cd7b21c6d..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
- input_access.set_valid_region(win, output->valid_region());
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLHashtableLookupKernel::CLHashtableLookupKernel()
- : _input(nullptr), _output(nullptr), _lookups(nullptr)
-{
-}
-
-Status CLHashtableLookupKernel::validate(const ITensorInfo *lookups, const ITensorInfo *keys,
- const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *hits)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(lookups, keys, input, output, hits);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lookups, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keys, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(hits, 1, DataType::U8, DataType::QASYMM8);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0,
- "Output's shape was not set");
-
- ARM_COMPUTE_ERROR_ON(lookups->dimensions(0) == hits->dimensions(0) &&
- output->dimension(output->num_dimensions() - 1) == lookups->dimension(0));
- ARM_COMPUTE_ERROR_ON(input->num_dimensions() < 2 && input->num_dimensions() > 4);
- ARM_COMPUTE_ERROR_ON(lookups->num_dimensions() > 1);
- ARM_COMPUTE_ERROR_ON(keys->num_dimensions() > 1);
- ARM_COMPUTE_ERROR_ON(hits->num_dimensions() > 1);
-
- return Status{};
-}
-
-void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTensor *keys,
- const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), lookups->info()));
-
- _lookups = lookups;
- _keys = keys;
- _input = input;
- _output = output;
- _hits = hits;
-
- // Make _lookup_indices tensor
- _lookup_indices = arm_compute::support::cpp14::make_unique<CLTensor>();
- _lookup_indices->allocator()->init(
- TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
- _lookup_indices->allocator()->allocate();
-
- // Set kernel build options
- std::stringstream kernel_name;
- std::set<std::string> build_opts;
- kernel_name << "hashtable_lookup";
-
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.emplace("-DNUM_DIMS=" + support::cpp11::to_string(_input->info()->num_dimensions()));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel(kernel_name.str(), build_opts));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-}
-
-void CLHashtableLookupKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const_cast<ICLTensor *>(_lookups)->map(queue);
- const_cast<ICLTensor *>(_keys)->map(queue);
- _hits->map(queue);
- _lookup_indices->map(queue);
-
- // Set values of hits
- const int32_t *lookups_buf =
- reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_lookups)->buffer());
- const int32_t *keys_buf = reinterpret_cast<int32_t *>(const_cast<ICLTensor *>(_keys)->buffer());
- uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
- int32_t *lookup_indices_buf = reinterpret_cast<int32_t *>(_lookup_indices->buffer());
-
- std::map<int32_t, size_t> key_map;
- const size_t keys_num = _keys->info()->dimension(0);
- for (size_t key_index = 0; key_index < keys_num; key_index++)
- {
- key_map[keys_buf[key_index]] = key_index;
- }
-
- const size_t lookups_num = _lookups->info()->dimension(0);
- for (size_t i = 0; i < lookups_num; ++i)
- {
- const auto lookup_value = lookups_buf[i];
- const auto it = key_map.find(lookup_value);
- if (it != key_map.end())
- {
-#if defined(DEBUG)
- if (it->second >= lookups_num)
- ARM_COMPUTE_ERROR("HashTable Lookup: index out of bounds.");
-#endif // defined(DEBUG)
- lookup_indices_buf[i] = static_cast<int32_t>(it->second);
- hits_buf[i] = static_cast<uint8_t>(1);
- }
- else
- {
- lookup_indices_buf[i] = -1;
- hits_buf[i] = static_cast<uint8_t>(0);
- }
- }
-
- const_cast<ICLTensor *>(_lookups)->unmap(queue);
- const_cast<ICLTensor *>(_keys)->unmap(queue);
- _hits->unmap(queue);
- _lookup_indices->unmap(queue);
-
- Window win = window.collapse(ICLKernel::window(), 2, 4);
-
- Window win_lookup;
- win_lookup.set(Window::DimX, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, win);
- add_4D_tensor_argument(idx, _output, win);
- add_1D_tensor_argument(idx, _lookup_indices.get(), win_lookup);
-
- enqueue(queue, *this, win);
- } while (window.slide_window_slice_4D(win) && window.slide_window_slice_1D(win_lookup));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
deleted file mode 100644
index 80d99dd3b..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLNegKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S16, DataType::S32,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S16, DataType::S32,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(input->info()->tensor_shape(),
- output->info()->tensor_shape());
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- return Status{};
-}
-
-} // namespace
-
-CLNegKernel::CLNegKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLNegKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
- // Create kernel
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("neg_tensor", build_opts));
-
- // Configure window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, input->info()->valid_region());
-
- ICLKernel::configure_internal(win);
-}
-
-void CLNegKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
- Window slice = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, lws_hint());
- } while (collapsed.slide_window_slice_3D(slice));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp
deleted file mode 100644
index 12bbe910f..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-
- // Checks performed when output is configured
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
- NormalizationLayerInfo norm_info)
-{
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, *input->clone());
-
- const unsigned int norm_size = norm_info.norm_size();
- bool is_in_map = norm_info.is_in_map();
-
- const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
- const BorderSize border_size = BorderSize(0, border_width);
-
- const unsigned int num_elems_processed_per_iteration = 4;
- const unsigned int num_elems_read_per_iteration =
- is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2))
- : num_elems_processed_per_iteration;
-
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
- // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside
- // the kernel, avoiding padding
- AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->valid_region());
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLNormalizationLayerExKernel::CLNormalizationLayerExKernel()
- : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false)
-{
-}
-
-BorderSize CLNormalizationLayerExKernel::border_size() const { return _border_size; }
-
-void CLNormalizationLayerExKernel::configure(const ICLTensor *input, ICLTensor *output,
- NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info()->clone());
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info));
-
- _input = input;
- _output = output;
-
- const unsigned int num_elems_processed_per_iteration = 4;
- const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
-
- // Set build options
- CLBuildOptions build_opts;
- build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.add_option(
- ("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
- build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
- build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
- build_opts.add_option(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size())));
- build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
- build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
-
- // Create kernel
- std::string kernel_name =
- _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map";
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-
- // Set config_id for enabling LWS tuning
- _config_id = "normalization_layer_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(
- static_cast<std::underlying_type<NormType>::type>(norm_info.type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(norm_info.norm_size());
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-Status CLNormalizationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(
- validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
-
- return Status{};
-}
-
-void CLNormalizationLayerExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const int collapsed_dimension = _is_in_map ? Window::DimZ : 4;
- Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension);
- Window slice = window_collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- } while (window_collapsed.slide_window_slice_3D(slice));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
deleted file mode 100644
index 241f8ae4d..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_info(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input->tensor_shape(), alpha->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32,
- DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(alpha, 1, DataType::F16, DataType::F32,
- DataType::QASYMM8);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
- return Status{};
-}
-} // namespace
-
-CLPReLUKernel::CLPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {}
-
-void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, alpha);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), alpha->info(), output->info()));
-
- _input = input;
- _alpha = alpha;
- _output = output;
-
- // Create kernel
- std::string kernel_name = "prelu";
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- if (is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- build_opts.emplace("-DOFF_IN1=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_IN2=" +
- support::cpp11::to_string(alpha->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().offset));
- build_opts.emplace("-DSCALE_IN1=" +
- support::cpp11::to_string(input->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_IN2=" +
- support::cpp11::to_string(alpha->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().scale));
- kernel_name += "_qasymm8";
- }
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info());
-
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output->info(), out_shape);
-
- if (input->info()->data_type() == DataType::F16 && alpha->info()->data_type() == DataType::F16)
- {
- set_format_if_unknown(*output->info(), Format::F16);
- }
- else if (input->info()->data_type() == DataType::F32 ||
- alpha->info()->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output->info(), Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input->info());
- Window win_input2 = win.broadcast_if_dimension_le_one(*alpha->info());
-
- AccessWindowHorizontal input1_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(alpha->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLPReLUKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input->info()->tensor_shape();
- const TensorShape &in_shape2 = _alpha->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice_input1);
- add_3D_tensor_argument(idx, _alpha, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLPReLUKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input->info()->dimension(0), _alpha->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp
deleted file mode 100644
index 99b54c822..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input_info, const ITensorInfo *output_info,
- const ITensorInfo *pad_size_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_info, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_info, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(pad_size_info, 1, DataType::S32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->num_dimensions() > 0 &&
- input_info->num_dimensions() <= 4,
- "Pad kernel supports upto 4-D input tensor");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- input_info->num_dimensions() == output_info->num_dimensions(),
- "output tensor should have same number of dimensions as input tensor");
-
- if (input_info->data_type() == DataType::QASYMM8)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->quantization_info() !=
- output_info->quantization_info(),
- "The input and output quantization info are different!");
- }
-
- return Status{};
-}
-
-} // namespace
-
-CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _pad_size(nullptr) {}
-
-void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, pad_size);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pad_size->info()));
-
- _input = input;
- _output = output;
- _pad_size = pad_size;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.emplace("-DIB=" + support::cpp11::to_string(input->info()->dimension(3)));
- build_opts.emplace("-DIW=" + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.emplace("-DIH=" + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.emplace("-DID=" + support::cpp11::to_string(input->info()->dimension(2)));
- if (input->info()->data_type() == DataType::QASYMM8)
- {
- build_opts.emplace("-DZERO_VALUE=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- }
- else
- {
- build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0));
- }
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("pad", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- _pad_size->map(queue);
-
- // Padding values only for up, top, left and front are required based on the rank of tensor
- int rank = _pad_size->info()->dimension(1);
-
- auto pad_batch_up =
- (rank == 4) ? *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 0})) : 0;
- auto pad_height_top =
- (rank >= 2)
- ? *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, (rank == 2) ? 0 : 1}))
- : 0;
- auto pad_width_left = (rank >= 1)
- ? *reinterpret_cast<const int32_t *>(
- _pad_size->ptr_to_element({0, (rank == 4) ? 2 : rank - 1}))
- : 0;
- auto pad_depth_front =
- (rank >= 3)
- ? *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, (rank == 3) ? 0 : 3}))
- : 0;
-
- _pad_size->unmap(queue);
-
- // Pad_values which needs to be passed
- const cl_int4 paddingValues = {
- {static_cast<cl_int>(pad_width_left), static_cast<cl_int>(pad_height_top),
- static_cast<cl_int>(pad_depth_front), static_cast<cl_int>(pad_batch_up)}};
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- _kernel.setArg<cl_int4>(idx++, paddingValues);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp
deleted file mode 100644
index aa094761c..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLPermuteExKernel.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-using namespace arm_compute;
-
-namespace
-{
-TensorShape get_output_shape(const ITensorInfo *input, const PermutationVector &perm)
-{
- TensorShape output_shape = input->tensor_shape();
- permute(output_shape, perm);
- return output_shape;
-}
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
-
- const TensorShape output_shape =
- misc::shape_calculator::compute_permutation_output_shape(*input, perm);
-
- // Validate configured output
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
- return Status{};
-}
-} // namespace
-
-CLPermuteExKernel::CLPermuteExKernel() : _input(nullptr), _output(nullptr), _perm() {}
-
-void CLPermuteExKernel::configure(const ICLTensor *input, ICLTensor *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm));
-
- _input = input;
- _output = output;
- _perm = perm;
-
- const TensorShape output_shape = get_output_shape(input->info(), perm);
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
-
- // Create kernel
- std::set<std::string> build_opts;
-
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2)));
-
- // New positions of batch(D), height(H), width(w) and channel(C) based on permutation vector
- build_opts.emplace("-DP1=" + support::cpp11::to_string(perm[0]));
- build_opts.emplace("-DP2=" + support::cpp11::to_string(perm[1]));
- build_opts.emplace("-DP3=" + support::cpp11::to_string(perm[2]));
- build_opts.emplace("-DP4=" + support::cpp11::to_string(perm[3]));
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("permute_generic", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
-
- // The CLPermute doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-Status CLPermuteExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, perm));
-
- return Status{};
-}
-
-void CLPermuteExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_out(slice_in);
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_out.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp
deleted file mode 100644
index b985aa737..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale, ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- ARM_COMPUTE_UNUSED(overflow_policy);
- ARM_COMPUTE_UNUSED(rounding_policy);
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative.");
-
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
-
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->data_type() == DataType::U8 &&
- (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
- "Output can only be U8 if both inputs are U8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2,
- ITensorInfo *output)
-{
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output, out_shape);
-
- if (input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16)
- {
- set_format_if_unknown(*output, Format::S16);
- }
- else if (input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output, Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input1);
- Window win_input2 = win.broadcast_if_dimension_le_one(*input2);
-
- AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLPixelWiseDivisionKernel::CLPixelWiseDivisionKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLPixelWiseDivisionKernel::configure(const ICLTensor *input1, const ICLTensor *input2,
- ICLTensor *output, float scale,
- ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(),
- scale, overflow_policy, rounding_policy));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- int scale_int = -1;
- // Extract sign, exponent and mantissa
- int exponent = 0;
- float normalized_mantissa = std::frexp(scale, &exponent);
- // Use int scaling if factor is equal to 1/2^n for 0 <= n <= 15
- // frexp returns 0.5 as mantissa which means that the exponent will be in the range of -1 <= e <=
- // 14
- // Moreover, it will be negative as we deal with 1/2^n
- if ((normalized_mantissa == 0.5f) && (-14 <= exponent) && (exponent <= 1))
- {
- // Store the positive exponent. We know that we compute 1/2^n
- // Additionally we need to subtract 1 to compensate that frexp used a mantissa of 0.5
- scale_int = std::abs(exponent - 1);
- }
-
- std::string data_type;
- std::string compute_type;
- // Check if it has float inputs and output
- if (is_data_type_float(input1->info()->data_type()) ||
- is_data_type_float(input2->info()->data_type()))
- {
- scale_int = -1;
- compute_type = (input1->info()->data_type() == DataType::F32 ||
- input2->info()->data_type() == DataType::F32)
- ? "float"
- : "half";
- data_type = "DATA_TYPE_FLOAT";
- }
- else
- {
- if (input1->info()->data_type() == DataType::S16 ||
- input2->info()->data_type() == DataType::S16)
- {
- compute_type = "int";
- }
- else
- {
- compute_type = "ushort";
- }
- data_type = "DATA_TYPE_INT";
- }
-
- // Construct kernel name
- std::string kernel_name = "pixelwise_div";
- kernel_name += (scale_int >= 0) ? "_int" : "_float";
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace(
- (overflow_policy == ConvertPolicy::WRAP || is_data_type_float(output->info()->data_type()))
- ? "-DWRAP"
- : "-DSATURATE");
- build_opts.emplace((rounding_policy == RoundingPolicy::TO_ZERO) ? "-DROUND=_rtz"
- : "-DROUND=_rte");
- build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_RES=" + compute_type);
- build_opts.emplace("-D" + data_type);
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Set scale argument
- unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the inputs and output parameters
-
- if (scale_int >= 0)
- {
- _kernel.setArg(idx++, scale_int);
- }
- else
- {
- _kernel.setArg(idx++, scale);
- }
-
- ICLKernel::configure_internal(win_config.second);
-}
-
-Status CLPixelWiseDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(
- validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(),
- input2->clone().get(),
- output->clone().get())
- .first);
-
- return Status{};
-}
-
-void CLPixelWiseDivisionKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input1->info()->tensor_shape();
- const TensorShape &in_shape2 = _input2->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); ++d)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input1, slice_input1);
- add_3D_tensor_argument(idx, _input2, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLPixelWiseDivisionKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
deleted file mode 100644
index f581780e1..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-namespace
-{
-// NOTE This is necessary because it is not guaranteed that the axis positions of input and output
-// are the same.
-const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t axis)
-{
- TensorShape out_shape{input_shape};
-
- out_shape.set(axis, 1);
-
- return out_shape;
-}
-} // namespace
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
- ReduceOperation op)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32, DataType::S32);
- if (op == ReduceOperation::MEAN || op == ReduceOperation::SUM)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8,
- "Not support QASYMM8, yet");
- }
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0,
- "Inputs are not broadcast compatible");
-
- const auto num_dimensions = input->tensor_shape().num_dimensions();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- axis >= 0 && axis < num_dimensions,
- "axis must be greater than or equal to 0 and less than (input's rank).");
-
- const TensorShape output_shape = inferOutputShape(input->tensor_shape(), axis);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(),
- "output shape's size does not match axis");
-
- return Status{};
-}
-} // namespace
-
-CLReduceOperationKernel::CLReduceOperationKernel() : _input(nullptr), _output(nullptr), _axis() {}
-
-void CLReduceOperationKernel::configure(const ICLTensor *input, ICLTensor *output,
- const uint32_t axis, ReduceOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
-
- _input = input;
- _output = output;
- _axis = axis;
-
- std::unique_ptr<ITensorInfo> output_info = output->info()->clone();
- output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), axis));
-
- // Construct kernel name
- std::string kernel_name;
- int op_code = 0;
- if (op == ReduceOperation::MAX)
- {
- kernel_name = "reduce_min_max";
- op_code = 1;
- }
- else if (op == ReduceOperation::MIN)
- {
- kernel_name = "reduce_min_max";
- op_code = 2;
- }
- else if (op == ReduceOperation::SUM)
- {
- kernel_name = "reduce_sum_mean";
- op_code = 3;
- }
- else if (op == ReduceOperation::MEAN)
- {
- kernel_name = "reduce_sum_mean";
- op_code = 4;
- }
- else
- throw std::runtime_error("Operation not supported, yet");
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(output_info->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2)));
- build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output_info, Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output_info->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-Status CLReduceOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t axis, ReduceOperation op)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
-
- return Status{};
-}
-
-void CLReduceOperationKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &shape_in = _input->info()->tensor_shape();
-
- unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
-
- _kernel.setArg<cl_int>(idx++, _axis);
- _kernel.setArg<cl_int>(idx++, shape_in[_axis]);
-
- // Support dimensions up to 4
- Window slice_out = window.collapse(ICLKernel::window(), 2, 4);
-
- // Setup input slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- // Copy output's shape in order to use for recovering at end of this method
- // TODO Remove changing and recovering output's shape if it is guaranteed that the axis positions
- // of input and output are the same
- const TensorShape shape_out = _output->info()->tensor_shape();
- _output->info()->set_tensor_shape(inferOutputShape(shape_in, _axis));
-
- idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
-
- // Recover output's shape of output tensor
- _output->info()->set_tensor_shape(shape_out);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
deleted file mode 100644
index 6b0697e89..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_size,
- const ITensorInfo *padding_size, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::F16, DataType::S32,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_size, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(padding_size, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::F16, DataType::S32,
- DataType::F32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() != output->num_dimensions(),
- "The number of dimensions of input should be equal to output");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() != output->data_layout(),
- "The input and output layouts are different!");
-
- // TODO Support other cases
- if (input->num_dimensions() == 4 && input->data_layout() == DataLayout::NCHW)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) != output->dimension(2),
- "Input Depth should be equal to Output Depth");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size->dimension(0) != 2 ||
- padding_size->dimension(1) != 2,
- "Only 2-dimensional spatial block's size was wrong");
- }
- else if (input->num_dimensions() == 4 && input->data_layout() == DataLayout::NHWC)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(0) != output->dimension(0),
- "Input Depth should be equal to Output Depth");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size->dimension(0) != 2 ||
- padding_size->dimension(1) != 2,
- "Only 2-dimensional spatial block's size was wrong");
- }
- else
- {
- ARM_COMPUTE_RETURN_ERROR_MSG("CLSpaceToBatchNDKernel supports only 4-dimensional input");
- }
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() < 2 && input->num_dimensions() > 4,
- "CLSpaceToBatchNDKernel supports dimensions up to 4");
-
- if (input->data_type() == DataType::QASYMM8)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->quantization_info() != output->quantization_info(),
- "The input and output quantization info are different!");
- }
-
- return Status{};
-}
-
-} // namespace
-
-CLSpaceToBatchNDKernel::CLSpaceToBatchNDKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor *block_size,
- const ICLTensor *padding_size, ICLTensor *output)
-{
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), block_size->info(), padding_size->info(), output->info()));
-
- _input = input;
- _block_size = block_size;
- _padding_size = padding_size;
- _output = output;
-
- // Set kernel build options
- // TODO Support other cases
- std::string kernel_name = "space_to_batch_4d";
- std::set<std::string> build_opts;
- Window win;
-
- if (input->info()->data_layout() == DataLayout::NCHW)
- {
- kernel_name += "_nchw";
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.emplace("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.emplace("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(0)));
-
- win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
- }
- else if (input->info()->data_layout() == DataLayout::NHWC)
- {
- kernel_name += "_nhwc";
- build_opts.emplace("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.emplace("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(2)));
- build_opts.emplace("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.emplace("-DVEC_SIZE=" +
- support::cpp11::to_string(num_elems_processed_per_iteration));
-
- win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
- input_access.set_valid_region(win, output->info()->valid_region());
-
- if (window_changed)
- {
- ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!");
- }
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported layout");
- }
-
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3)));
- if (input->info()->data_type() == DataType::QASYMM8)
- {
- build_opts.emplace("-DZERO_VALUE=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- }
- else
- {
- build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0));
- }
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- ICLKernel::configure_internal(win);
-}
-
-void CLSpaceToBatchNDKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
-#if defined(DEBUG)
- const_cast<ICLTensor *>(_block_size)->map(queue);
- const_cast<ICLTensor *>(_padding_size)->map(queue);
-
- const size_t num_dimensions = _input->info()->num_dimensions();
- const size_t num_spacial_dimensions = _block_size->info()->dimension(0);
- int32_t batch_size = _input->info()->dimension(num_dimensions - 1);
- for (size_t i = 0; i < num_spacial_dimensions; ++i)
- {
- const int32_t block_size = *reinterpret_cast<int32_t *>(_block_size->ptr_to_element({i}));
- const int32_t padding_size_pre =
- *reinterpret_cast<int32_t *>(_padding_size->ptr_to_element({0, i}));
- const int32_t padding_size_post =
- *reinterpret_cast<int32_t *>(_padding_size->ptr_to_element({1, i}));
-
- ARM_COMPUTE_ERROR_ON_MSG(block_size < 1, "Block size should be greater than or equal to 1");
- ARM_COMPUTE_ERROR_ON_MSG(padding_size_pre < 0 && padding_size_post < 0,
- "Padding size should be greater than or equal to 0");
-
- if (num_dimensions == 4 && _input->info()->data_layout() == DataLayout::NCHW)
- {
- ARM_COMPUTE_ERROR_ON_MSG(
- _output->info()->dimension(i) !=
- (_input->info()->dimension(i) + padding_size_pre + padding_size_post) / block_size,
- "Dimension value of spatial block does not match output's dimension value");
- }
- else
- {
- ARM_COMPUTE_ERROR_ON_MSG(
- _output->info()->dimension(num_dimensions - num_spacial_dimensions - 1 + i) !=
- (_input->info()->dimension(num_dimensions - num_spacial_dimensions - 1 + i) +
- padding_size_pre + padding_size_post) /
- block_size,
- "Dimension value of spatial block does not match output's dimension value");
- }
-
- batch_size *= block_size;
- }
- ARM_COMPUTE_ERROR_ON_MSG(
- _output->info()->dimension(num_dimensions - 1) != batch_size,
- "Output batch size should be equal to input batch size * (multiplication of all block size)");
-
- const_cast<ICLTensor *>(_block_size)->unmap(queue);
- const_cast<ICLTensor *>(_padding_size)->unmap(queue);
-#endif // defined(DEBUG)
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- // Set block size window
- Window win_block = calculate_max_window(*_block_size->info(), Steps());
-
- // Set padding size window
- Window win_padding = calculate_max_window(*_padding_size->info(), Steps());
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- add_1D_tensor_argument(idx, _block_size, win_block);
- add_2D_tensor_argument(idx, _padding_size, win_padding);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
deleted file mode 100644
index 5d6329edc..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const int32_t block_size)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size >= 1,
- "Block size should be greater than or equal to 1.");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(3) == output->dimension(3),
- "Input batch should be equal to Output batch");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- input->dimension(2) * block_size * block_size == output->dimension(2),
- "Output depth should be equal to (input depth * block size *block size)");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(input->dimension(0) % block_size) &&
- !(input->dimension(1) % block_size),
- "Input height and width should be divisible by block size");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) == (input->dimension(0) / block_size)) &&
- (output->dimension(1) == (input->dimension(1) / block_size)),
- "Output height and width should be equal to "
- "input_height/blocksize and input_width/blocksize respectively");
-
- return Status{};
-}
-
-} // namespace
-
-CLSpaceToDepthKernel::CLSpaceToDepthKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLSpaceToDepthKernel::configure(const ICLTensor *input, ICLTensor *output,
- const int32_t block_size)
-{
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size));
-
- _input = input;
- _output = output;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size));
- build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2)));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("space_to_depth", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLSpaceToDepthKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_out(slice_in);
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_out.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp
deleted file mode 100644
index 260bc39f1..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLSquaredDifferenceKernel.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::F16, DataType::F32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
- return Status{};
-}
-} // namespace
-
-CLSquaredDifferenceKernel::CLSquaredDifferenceKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLSquaredDifferenceKernel::configure(const ICLTensor *input1, const ICLTensor *input2,
- ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate(input1->info(), input2->info(), output->info()));
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- // Create kernel
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("squared_difference", build_opts));
-
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1->info(), *input2->info());
-
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output->info(), out_shape);
-
- if (input1->info()->data_type() == DataType::F16 &&
- input2->info()->data_type() == DataType::F16)
- {
- set_format_if_unknown(*output->info(), Format::F16);
- }
- else if (input1->info()->data_type() == DataType::F32 ||
- input2->info()->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output->info(), Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input1->info());
- Window win_input2 = win.broadcast_if_dimension_le_one(*input2->info());
-
- AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLSquaredDifferenceKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input1->info()->tensor_shape();
- const TensorShape &in_shape2 = _input2->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input1, slice_input1);
- add_3D_tensor_argument(idx, _input2, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLSquaredDifferenceKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp
deleted file mode 100644
index 48146a43a..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/TensorInfo.h"
-
-using namespace arm_compute;
-
-CLStridedSliceExKernel::CLStridedSliceExKernel()
- : _input(nullptr), _output(nullptr), _beginData(nullptr), _endData(nullptr),
- _stridesData(nullptr), _beginMask(0), _endMask(0), _shrinkAxisMask(0)
-{
-}
-
-Status CLStridedSliceExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *begin, const ITensorInfo *end,
- const ITensorInfo *strides, int32_t beginMask,
- int32_t endMask, int32_t shrinkAxisMask)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, begin, end, strides);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(begin, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(end, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(strides, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_ERROR_ON(begin->num_dimensions() != 1 || begin->dimension(0) > 4);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(begin->tensor_shape(), end->tensor_shape(),
- strides->tensor_shape());
-
- return Status{};
-}
-
-// Return the index for the first element along that axis. This index will be a
-// positive integer between [0, axisSize - 1] that can be used to index
-// directly into the data.
-inline int32_t StartForAxis(int32_t beginMask, int32_t begin, int32_t stride,
- const TensorShape &inputShape, int32_t axis)
-{
- // Begin with the specified index
- int32_t start = begin;
-
- // beginMask override
- if (beginMask & 1 << axis)
- {
- if (stride > 0)
- {
- // Forward iteration - use the first element. These values will get
- // clamped below (Note: We could have set them to 0 and axisSize-1, but
- // use lowest() and max() to maintain symmetry with StopForAxis())
- start = std::numeric_limits<int32_t>::lowest();
- }
- else
- {
- // Backward iteration - use the last element.
- start = std::numeric_limits<int32_t>::max();
- }
- }
-
- // Handle negative indices
- int32_t axisSize = inputShape[axis];
- if (start < 0)
- {
- start += axisSize;
- }
-
- // Clamping
- start = arm_compute::utility::clamp(start, 0, axisSize - 1);
-
- return start;
-}
-
-// Return the "real" index for the end of iteration along that axis. This is an
-// "end" in the traditional C sense, in that it points to one past the last
-// element. ie. So if you were iterating through all elements of a 1D array of
-// size 4, this function would return 4 as the stop, because it is one past the
-// "real" indices of 0, 1, 2 & 3.
-inline int32_t StopForAxis(int32_t endMask, int32_t end, int32_t stride,
- const TensorShape &inputShape, int32_t axis)
-{
- // Begin with the specified index
- int32_t stop = end;
-
- // endMask override
- if (endMask & (1 << axis))
- {
- if (stride > 0)
- {
- // Forward iteration - use the last element. These values will get
- // clamped below
- stop = std::numeric_limits<int32_t>::max();
- }
- else
- {
- // Backward iteration - use the first element.
- stop = std::numeric_limits<int32_t>::lowest();
- }
- }
-
- // Handle negative indices
- int32_t axisSize = inputShape[axis];
- if (stop < 0)
- {
- stop += axisSize;
- }
-
- // Clamping
- // Because the end index points one past the last element, we need slightly
- // different clamping ranges depending on the direction.
- if (stride > 0)
- {
- // Forward iteration
- stop = arm_compute::utility::clamp(stop, 0, axisSize);
- }
- else
- {
- // Backward iteration
- stop = arm_compute::utility::clamp(stop, -1, axisSize - 1);
- }
-
- return stop;
-}
-
-inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride)
-{
- int32_t ret = 0;
- if (stride > 0)
- {
- ret = ((stop - start - 1) / stride) + 1;
- }
- else
- {
- ret = ((stop - start + 1) / stride) + 1;
- }
- ARM_COMPUTE_ERROR_ON_MSG(ret < 0, "The dimension must be the natural number");
- return ret;
-}
-
-void CLStridedSliceExKernel::configure(const ICLTensor *input, ICLTensor *output,
- ICLTensor *beginData, ICLTensor *endData,
- ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), beginData->info(),
- endData->info(), stridesData->info(), beginMask, endMask,
- shrinkAxisMask));
-
- _input = input;
- _output = output;
- _beginData = beginData;
- _endData = endData;
- _stridesData = stridesData;
- _beginMask = beginMask;
- _endMask = endMask;
- _shrinkAxisMask = shrinkAxisMask;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DELEMENT_DATA_TYPE=" +
- get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("strided_slice_ex", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
- ICLKernel::configure_internal(win);
-}
-
-void CLStridedSliceExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _beginData->map(queue);
- _endData->map(queue);
- _stridesData->map(queue);
-
- std::vector<int32_t> starts;
- std::vector<int32_t> strides;
-
- for (uint32_t n = 0; n < _beginData->info()->tensor_shape().total_size(); ++n)
- {
- const TensorShape shape = _input->info()->tensor_shape();
- starts.emplace_back(
- StartForAxis(_beginMask, reinterpret_cast<int32_t *>(_beginData->buffer())[n],
- reinterpret_cast<int32_t *>(_stridesData->buffer())[n], shape, n));
-
- strides.emplace_back(reinterpret_cast<int32_t *>(_stridesData->buffer())[n]);
- }
-
- for (uint32_t n = _beginData->info()->tensor_shape().total_size(); n < 4; n++)
- {
- starts.emplace_back(0);
- strides.emplace_back(1);
- }
- // TODO: Apply shrinkAxisMask
-
- _beginData->unmap(queue);
- _stridesData->unmap(queue);
- _endData->unmap(queue);
-
- unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
- const cl_int4 startsArg = {{
- static_cast<cl_int>(starts[0]), static_cast<cl_int>(starts[1]),
- static_cast<cl_int>(starts[2]), static_cast<cl_int>(starts[3]),
- }};
- _kernel.setArg<cl_int4>(idx++, startsArg);
-
- const cl_int4 stridesArg = {{
- static_cast<cl_int>(strides[0]), static_cast<cl_int>(strides[1]),
- static_cast<cl_int>(strides[2]), static_cast<cl_int>(strides[3]),
- }};
- _kernel.setArg<cl_int4>(idx++, stridesArg);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
deleted file mode 100644
index 073c2f7bb..000000000
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-namespace arm_compute
-{
-////////////////////////////////////////////////////////////////////////////////
-CLTopKV2Single::CLTopKV2Single() : _input(nullptr), _topk_values(nullptr), _topk_indices(nullptr) {}
-
-void CLTopKV2Single::configure(ICLTensor *input, ICLTensor *topk_values, ICLTensor *topk_indices,
- cl::Buffer *indices, cl::Buffer *temp_stack, int k, int n)
-{
- ARM_COMPUTE_ERROR_ON(input == nullptr && indices == nullptr);
- ARM_COMPUTE_ERROR_ON(topk_values == nullptr && topk_indices == nullptr);
- ARM_COMPUTE_ERROR_ON(n == 0);
-
- _input = input;
- _topk_values = topk_values;
- _topk_indices = topk_indices;
-
- // Set kernel build options
- std::set<std::string> build_opts;
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("topkv2_quicksort", build_opts));
-
- unsigned int idx = 3 * num_arguments_per_1D_tensor();
- _kernel.setArg(idx++, *indices);
- _kernel.setArg(idx++, *temp_stack);
- _kernel.setArg<cl_int>(idx++, k);
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, 1, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLTopKV2Single::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- unsigned int idx = 0;
- add_1D_tensor_argument(idx, _input, window);
- add_1D_tensor_argument(idx, _topk_values, window);
- add_1D_tensor_argument(idx, _topk_indices, window);
-
- enqueue(queue, *this, window);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLTopKV2Init::CLTopKV2Init() : _input(nullptr) {}
-
-void CLTopKV2Init::configure(ICLTensor *input, cl::Buffer *in_key_buf, cl::Buffer *in_ind_buf,
- int n)
-{
- ARM_COMPUTE_ERROR_ON(input == nullptr && in_key_buf == nullptr);
- ARM_COMPUTE_ERROR_ON(in_ind_buf == nullptr);
- ARM_COMPUTE_ERROR_ON(n == 0);
-
- _input = input;
-
- // Set kernel build options
- std::set<std::string> build_opts;
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("topkv2_init", build_opts));
-
- unsigned int idx = num_arguments_per_1D_tensor();
- _kernel.setArg(idx++, *in_key_buf);
- _kernel.setArg(idx++, *in_ind_buf);
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, n, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLTopKV2Init::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- unsigned int idx = 0;
- add_1D_tensor_argument(idx, _input, window);
-
- enqueue(queue, *this, window);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// This kernel makes a histogram of radix for each work item.
-CLRadixSortHistogram::CLRadixSortHistogram() : _pass(0), _in_key_buf(nullptr) {}
-
-void CLRadixSortHistogram::configure(cl::Buffer *hist_buf, int bits, int n)
-{
- ARM_COMPUTE_ERROR_ON(hist_buf == nullptr);
-
- unsigned int radix = 1 << bits;
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits));
- build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix));
- build_opts.emplace("-DPERMUT=1");
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("radixsort_histogram", build_opts));
-
- int loc_histo_size = radix * _ITEMS * sizeof(cl_int);
-
- unsigned int idx = 1;
- _kernel.setArg(idx++, *hist_buf);
-
- idx = 3;
- _kernel.setArg(idx++, loc_histo_size, nullptr);
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, _GROUPS * _ITEMS, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLRadixSortHistogram::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _kernel.setArg(0, *_in_key_buf);
- _kernel.setArg<cl_int>(2, _pass);
-
- cl::NDRange lws = cl::NDRange(_ITEMS, 1);
-
- enqueue(queue, *this, window, lws);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLRadixSortScanHistogram::CLRadixSortScanHistogram() {}
-
-void CLRadixSortScanHistogram::configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits)
-{
- ARM_COMPUTE_ERROR_ON(hist_buf == nullptr && glob_sum_buf == nullptr);
-
- unsigned int radix = 1 << bits;
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits));
- build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix));
- build_opts.emplace("-DPERMUT=1");
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("radixsort_scanhistograms", build_opts));
-
- int temp_size =
- std::max<uint32_t>(_HISTOSPLIT, _ITEMS * _GROUPS * radix / _HISTOSPLIT) * sizeof(cl_uint);
-
- unsigned int idx = 0;
- _kernel.setArg(idx++, *hist_buf);
- _kernel.setArg(idx++, temp_size, nullptr);
- _kernel.setArg(idx++, *glob_sum_buf);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, radix * _GROUPS * _ITEMS / 2, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLRadixSortScanHistogram::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step();
- cl::NDRange lws = cl::NDRange(gws_x / _HISTOSPLIT, 1);
-
- enqueue(queue, *this, window, lws);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLRadixSortGlobalScanHistogram::CLRadixSortGlobalScanHistogram() {}
-
-void CLRadixSortGlobalScanHistogram::configure(cl::Buffer *glob_sum_buf, cl::Buffer *temp_buf,
- int bits)
-{
- ARM_COMPUTE_ERROR_ON(glob_sum_buf == nullptr && temp_buf == nullptr);
-
- unsigned int radix = 1 << bits;
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits));
- build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix));
- build_opts.emplace("-DPERMUT=1");
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("radixsort_scanhistograms", build_opts));
-
- int temp_size =
- std::max<uint32_t>(_HISTOSPLIT, _ITEMS * _GROUPS * radix / _HISTOSPLIT) * sizeof(cl_uint);
-
- unsigned int idx = 0;
- _kernel.setArg(idx++, *glob_sum_buf);
- _kernel.setArg(idx++, temp_size, nullptr);
- _kernel.setArg(idx++, *temp_buf);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, _HISTOSPLIT / 2, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLRadixSortGlobalScanHistogram::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step();
- cl::NDRange lws = cl::NDRange(gws_x, 1);
-
- enqueue(queue, *this, window, lws);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLRadixSortPasteHistogram::CLRadixSortPasteHistogram() {}
-
-void CLRadixSortPasteHistogram::configure(cl::Buffer *hist_buf, cl::Buffer *glob_sum_buf, int bits)
-{
- ARM_COMPUTE_ERROR_ON(hist_buf == nullptr && glob_sum_buf == nullptr);
-
- unsigned int radix = 1 << bits;
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits));
- build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix));
- build_opts.emplace("-DPERMUT=1");
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("radixsort_pastehistograms", build_opts));
-
- unsigned int idx = 0;
- _kernel.setArg(idx++, *hist_buf);
- _kernel.setArg(idx++, *glob_sum_buf);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, radix * _GROUPS * _ITEMS / 2, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLRadixSortPasteHistogram::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step();
- cl::NDRange lws = cl::NDRange(gws_x / _HISTOSPLIT, 1);
-
- enqueue(queue, *this, window, lws);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLRadixSortReorder::CLRadixSortReorder()
- : _pass(0), _in_key_buf(nullptr), _out_key_buf(nullptr), _in_ind_buf(nullptr),
- _out_ind_buf(nullptr)
-{
-}
-
-void CLRadixSortReorder::configure(cl::Buffer *hist_buf, int bits, int n)
-{
- ARM_COMPUTE_ERROR_ON(hist_buf == nullptr);
- ARM_COMPUTE_ERROR_ON(n == 0);
-
- unsigned int radix = 1 << bits;
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-D_BITS=" + support::cpp11::to_string(bits));
- build_opts.emplace("-D_RADIX=" + support::cpp11::to_string(radix));
- build_opts.emplace("-DPERMUT=1");
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("radixsort_reorder", build_opts));
-
- unsigned int idx = 2;
- _kernel.setArg(idx++, *hist_buf);
-
- idx = 6;
- _kernel.setArg(idx++, sizeof(uint) * radix * _ITEMS, nullptr);
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, _GROUPS * _ITEMS, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLRadixSortReorder::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step();
- unsigned int lx = std::max(1U, (gws_x / _HISTOSPLIT));
- cl::NDRange lws = (lx < gws_x) ? cl::NDRange(lx, 1) : cl::NDRange(1, 1);
-
- _kernel.setArg(0, *_in_key_buf);
- _kernel.setArg(1, *_out_key_buf);
- _kernel.setArg<cl_int>(3, _pass);
- _kernel.setArg(4, *_in_ind_buf);
- _kernel.setArg(5, *_out_ind_buf);
-
- enqueue(queue, *this, window, lws);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLTopKV2FindFirstNegative::CLTopKV2FindFirstNegative() : _out_key_buf(nullptr) {}
-
-void CLTopKV2FindFirstNegative::configure(cl::Buffer *first_negative_idx_buf, int n)
-{
- ARM_COMPUTE_ERROR_ON(first_negative_idx_buf == nullptr);
- ARM_COMPUTE_ERROR_ON(n == 0);
-
- // Set kernel build options
- std::set<std::string> build_opts;
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("topkv2_find_first_negative", build_opts));
-
- unsigned int idx = 1;
- _kernel.setArg(idx++, *first_negative_idx_buf);
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, n, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLTopKV2FindFirstNegative::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- unsigned int idx = 0;
- _kernel.setArg(idx++, *_out_key_buf);
-
- enqueue(queue, *this, window);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLTopKV2ReorderNegatives::CLTopKV2ReorderNegatives()
- : _in_key_buf(nullptr), _out_key_buf(nullptr), _in_ind_buf(nullptr), _out_ind_buf(nullptr)
-{
-}
-
-void CLTopKV2ReorderNegatives::configure(cl::Buffer *first_negative_idx_buf, int n)
-{
- ARM_COMPUTE_ERROR_ON(first_negative_idx_buf == nullptr);
- ARM_COMPUTE_ERROR_ON(n == 0);
-
- // Set kernel build options
- std::set<std::string> build_opts;
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("topkv2_reorder_negatives", build_opts));
-
- unsigned int idx = 4;
- _kernel.setArg(idx++, *first_negative_idx_buf);
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, n, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLTopKV2ReorderNegatives::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- unsigned int idx = 0;
- _kernel.setArg(idx++, *_in_key_buf);
- _kernel.setArg(idx++, *_out_key_buf);
- _kernel.setArg(idx++, *_in_ind_buf);
- _kernel.setArg(idx++, *_out_ind_buf);
-
- enqueue(queue, *this, window);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-CLTopKV2Store::CLTopKV2Store()
- : _values(nullptr), _indices(nullptr), _out_key_buf(nullptr), _out_ind_buf(nullptr)
-{
-}
-
-void CLTopKV2Store::configure(ICLTensor *values, ICLTensor *indices, int k, int n)
-{
- ARM_COMPUTE_ERROR_ON(values == nullptr && indices == nullptr);
- ARM_COMPUTE_ERROR_ON(k == 0);
- ARM_COMPUTE_ERROR_ON(k > n);
-
- _values = values;
- _indices = indices;
-
- // Set kernel build options
- std::set<std::string> build_opts;
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("topkv2_store", build_opts));
-
- unsigned int idx = 2 * num_arguments_per_1D_tensor() + 2;
- _kernel.setArg<cl_int>(idx++, n);
-
- // Configure kernel window
- Window win;
- win.set(0, Window::Dimension(0, k, 1));
- ICLKernel::configure_internal(win);
-}
-
-void CLTopKV2Store::setOutputBuffers(cl::Buffer *out_key_buf, cl::Buffer *out_ind_buf)
-{
- _out_key_buf = out_key_buf;
- _out_ind_buf = out_ind_buf;
-}
-
-void CLTopKV2Store::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- unsigned int idx = 0;
- add_1D_tensor_argument(idx, _values, window);
- add_1D_tensor_argument(idx, _indices, window);
- _kernel.setArg(idx++, *_out_key_buf);
- _kernel.setArg(idx++, *_out_ind_buf);
-
- enqueue(queue, *this, window);
-}
-
-} // namespace arm_compute
diff --git a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp
deleted file mode 100644
index 3b5782c25..000000000
--- a/libs/ARMComputeEx/src/core/NEON/kernels/NENormalizationLayerExKernel.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerExKernel.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEMath.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *input_squared,
- const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_squared, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_squared);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, input_squared);
-
- // Checks performed when output is configured
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input,
- ITensorInfo *input_squared,
- ITensorInfo *output,
- const NormalizationLayerInfo &norm_info)
-{
- unsigned int num_elems_processed_per_iteration = 16 / input->element_size();
- const unsigned int num_elems_read_per_iteration =
- num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
- const unsigned int num_rows =
- (norm_info.type() == NormType::IN_MAP_2D) ? norm_info.norm_size() : 1;
- const unsigned int border_width =
- (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
- BorderSize border_size = BorderSize(0, border_width);
- bool window_changed = false;
-
- // Configure window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
- AccessWindowRectangle input_access(input, -border_size.left, 0, num_elems_read_per_iteration,
- num_rows);
- AccessWindowRectangle input_squared_access(input_squared, -border_size.left, 0,
- num_elems_read_per_iteration, num_rows);
-
- if (output->total_size() != 0)
- {
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed =
- update_window_and_padding(win, input_access, input_squared_access, output_access);
- output_access.set_valid_region(win, input->valid_region());
- }
- else
- {
- window_changed = update_window_and_padding(win, input_access, input_squared_access);
- }
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-NENormalizationLayerExKernel::NENormalizationLayerExKernel()
- : _func(nullptr), _input(nullptr), _input_squared(nullptr), _output(nullptr),
- _norm_info(NormType::IN_MAP_1D), _border_size()
-{
-}
-
-BorderSize NENormalizationLayerExKernel::border_size() const { return _border_size; }
-
-void NENormalizationLayerExKernel::configure(const ITensor *input, const ITensor *input_squared,
- ITensor *output, NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_squared, output);
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info());
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(
- validate_arguments(input->info(), input_squared->info(), output->info(), norm_info));
-
- const unsigned int border_width =
- (norm_info.is_cross_map()) ? 0 : std::min<unsigned int>(norm_info.norm_size() / 2, 3U);
-
- _input = input;
- _input_squared = input_squared;
- _output = output;
- _norm_info = norm_info;
- _border_size = BorderSize(0, border_width);
-
- switch (_input->info()->data_type())
- {
- case DataType::F32:
- {
- switch (norm_info.type())
- {
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 0, true>;
- break;
- case NormType::CROSS_MAP:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F32, 2, false>;
- break;
- default:
- break;
- }
- break;
- }
- case DataType::F16:
- {
- switch (norm_info.type())
- {
- case NormType::IN_MAP_1D:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, false>;
- break;
- case NormType::IN_MAP_2D:
- // Normalize over X and Y
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 0, true>;
- break;
- case NormType::CROSS_MAP:
- _func = &NENormalizationLayerExKernel::normalize_float<DataType::F16, 2, false>;
- break;
- default:
- break;
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("NOT SUPPORTED!");
- }
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), input_squared->info(),
- output->info(), norm_info);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- INEKernel::configure(win_config.second);
-}
-
-template <DataType dt, unsigned int dim, bool do_2D_norm>
-void NENormalizationLayerExKernel::normalize_float(const Window &window)
-{
- Iterator input(_input, window);
- Iterator input_squared(_input_squared, window);
- Iterator output(_output, window);
-
- const int dim_y = 1;
- const int radius = _norm_info.norm_size();
- const int total_size = _input->info()->dimension(dim) - 1;
- const int input_squared_stride = _input_squared->info()->strides_in_bytes()[dim];
- // We account padding across X only and we iterate over rows
- const int min_left = (dim == 2) ? 0 : -static_cast<int>(border_size().left);
- const int max_right = (dim == 2) ? total_size : total_size + border_size().left;
- const int min_top = 0;
- const int max_bottom = _input->info()->dimension(dim_y) - 1;
-
- if (dt == DataType::F32)
- {
- const float32x4_t coeff_vec = vdupq_n_f32(_norm_info.scale_coeff());
- const float32x4_t beta_vec = vdupq_n_f32(_norm_info.beta());
- const float32x4_t kappa_vec = vdupq_n_f32(_norm_info.kappa());
-
- execute_window_loop(
- window,
- [&](const Coordinates &id) {
- // Get range to normalize
- const int current_row = do_2D_norm ? id[dim_y] : 0;
- const int current_slice = id[dim];
- const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0;
- const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
- const int first_slice = std::max(current_slice - radius, min_left);
- const int last_slice = std::min(current_slice + radius, max_right);
-
- // Accumulate 2D In-Map values
- float32x4_t accu = vdupq_n_f32(0.f);
- for (int j = first_row; j <= last_row; j++)
- {
- // Compute row displacement
- const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y];
- const uint8_t *const input_squared_ptr =
- input_squared.ptr() + row - (current_slice * input_squared_stride);
- for (int i = first_slice; i <= last_slice; ++i)
- {
- accu = vaddq_f32(accu, vld1q_f32(reinterpret_cast<const float *>(
- input_squared_ptr + i * input_squared_stride)));
- }
- }
-
- // Normalize
- const float32x4_t normalized = vpowq_f32(vmlaq_f32(kappa_vec, coeff_vec, accu), beta_vec);
- const float32x4_t normalized_pixel = vmulq_f32(
- vld1q_f32(reinterpret_cast<const float *>(input.ptr())), vinvq_f32(normalized));
- vst1q_f32(reinterpret_cast<float *>(output.ptr()), normalized_pixel);
- },
- input, input_squared, output);
- }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- else if (dt == DataType::F16)
- {
- const float16x8_t coeff_vec = vdupq_n_f16(_norm_info.scale_coeff());
- const float16x8_t beta_vec_f16 = vdupq_n_f16(_norm_info.beta());
- const float16x8_t kappa_vec = vdupq_n_f16(_norm_info.kappa());
-
- execute_window_loop(
- window,
- [&](const Coordinates &id) {
- // Get range to normalize
- const int current_row = do_2D_norm ? id[dim_y] : 0;
- const int current_slice = id[dim];
- const int first_row = do_2D_norm ? std::max(current_row - radius, min_top) : 0;
- const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0;
- const int first_slice = std::max(current_slice - radius, min_left);
- const int last_slice = std::min(current_slice + radius, max_right);
-
- // Accumulate 2D In-Map values
- float16x8_t accu = vdupq_n_f16(0.f);
- for (int j = first_row; j <= last_row; j++)
- {
- // Compute row displacement
- const int row = (j - current_row) * _input_squared->info()->strides_in_bytes()[dim_y];
- const uint8_t *const input_squared_ptr =
- input_squared.ptr() + row - (current_slice * input_squared_stride);
- for (int i = first_slice; i <= last_slice; ++i)
- {
- accu = vaddq_f16(accu, vld1q_f16(reinterpret_cast<const float16_t *>(
- input_squared_ptr + i * input_squared_stride)));
- }
- }
-
- const float16x8_t norm_f16 =
- vpowq_f16(vaddq_f16(kappa_vec, vmulq_f16(coeff_vec, accu)), beta_vec_f16);
- const float16x8_t normalized_pixel = vmulq_f16(
- vld1q_f16(reinterpret_cast<const float16_t *>(input.ptr())), vinvq_f16(norm_f16));
- vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), normalized_pixel);
- },
- input, input_squared, output);
- }
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- else
- {
- ARM_COMPUTE_ERROR("Not supported");
- }
-}
-
-Status NENormalizationLayerExKernel::validate(const ITensorInfo *input,
- const ITensorInfo *input_squared,
- const ITensorInfo *output,
- const NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, input_squared, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
- input_squared->clone().get(),
- output->clone().get(), norm_info)
- .first);
-
- return Status{};
-}
-
-void NENormalizationLayerExKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
- // Run function
- (this->*_func)(window);
-}
diff --git a/libs/ARMComputeEx/src/core/UtilsEx.cpp b/libs/ARMComputeEx/src/core/UtilsEx.cpp
deleted file mode 100644
index b63093bbb..000000000
--- a/libs/ARMComputeEx/src/core/UtilsEx.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/UtilsEx.h"
-
-#include <cstdint>
-#include <fstream>
-#include <map>
-#include <string>
-
-using namespace arm_compute;
-
-const std::string &
-arm_compute::string_from_activation_func_ex(ActivationLayerInfoEx::ActivationFunction act)
-{
- static std::map<ActivationLayerInfoEx::ActivationFunction, const std::string> act_map = {
- {ActivationLayerInfoEx::ActivationFunction::RSQRT, "RSQRT"},
- };
-
- return act_map[act];
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp
deleted file mode 100644
index 1e52fc429..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLActivationLayerEx.h"
-
-#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h"
-
-using namespace arm_compute;
-
-void CLActivationLayerEx::configure(ICLTensor *input, ICLTensor *output,
- ActivationLayerInfoEx act_info)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerExKernel>();
- k->configure(input, output, act_info);
- _kernel = std::move(k);
-}
-
-Status CLActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ActivationLayerInfoEx &act_info)
-{
- return CLActivationLayerExKernel::validate(input, output, act_info);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp
deleted file mode 100644
index dff743e89..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLArgMinMax.h"
-
-#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-namespace arm_compute
-{
-
-CLArgMinMax::CLArgMinMax()
- : _input(nullptr), _output(nullptr), _argminmax_axis(), _interm_tensors(), _argminmax_kernels(),
- _num_of_kernels()
-{
-}
-
-void CLArgMinMax::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis,
- ArgOperation op)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op));
- _input = input;
- _output = output;
- _argminmax_axis = axis;
- _arg_op = op;
- // NOTE The argminmax_axis must have no duplication.
- _num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = _num_of_kernels - 1;
-
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _argminmax_kernels =
- arm_compute::support::cpp14::make_unique<CLArgMinMaxKernel[]>(_num_of_kernels);
-
- TensorShape shape{input->info()->tensor_shape()};
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(_argminmax_axis[i], 1);
- _interm_tensors[i].allocator()->init(
- TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()));
- _interm_tensors[i].allocator()->allocate();
- }
-
- // Set a vector that is ordered ICLTensors sequentially.
- std::vector<ICLTensor *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(_interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Apply ArgMinMax on all kernels
- for (size_t i = 0; i < _num_of_kernels; i++)
- {
- _argminmax_kernels[i].configure(tensors[i], tensors[i + 1], _argminmax_axis[i], op);
- }
-}
-
-Status CLArgMinMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis,
- const ITensorInfo *output, ArgOperation op)
-{
- const size_t num_of_kernels = argminmax_axis.size();
- const size_t num_of_interm_tensors = num_of_kernels - 1;
-
- // Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
-
- // Create intermediate tensor info
- TensorShape shape{input->tensor_shape()};
-
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(argminmax_axis[i], 1);
- interm_tensors[i].set_data_type(input->data_type());
- interm_tensors[i].set_tensor_shape(shape);
- interm_tensors[i].set_num_channels(input->num_channels());
- }
-
- // Set a vector that is ordered ITensorInfo sequentially.
- std::vector<const ITensorInfo *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Validate argminmax only on all kernels
- for (size_t i = 0; i < num_of_kernels; i++)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLArgMinMaxKernel::validate(tensors[i], tensors[i + 1], argminmax_axis[i], op));
- }
-
- return Status{};
-}
-
-void CLArgMinMax::run()
-{
- for (size_t i = 0; i < _num_of_kernels; ++i)
- {
- CLScheduler::get().enqueue(_argminmax_kernels[i]);
- }
-}
-
-} // namespace arm_compute
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp
deleted file mode 100644
index 3f403c80a..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h"
-
-using namespace arm_compute;
-
-void CLArithmeticSubtractionEx::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- ConvertPolicy policy)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLArithmeticSubtractionExKernel>();
- k->configure(input1, input2, output, policy);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
-
-Status CLArithmeticSubtractionEx::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, ConvertPolicy policy)
-{
- return CLArithmeticSubtractionExKernel::validate(input1, input2, output, policy);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp
deleted file mode 100644
index 26e3798cc..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h"
-
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h"
-
-using namespace arm_compute;
-
-void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
deleted file mode 100644
index 7c5fe5eda..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h"
-
-#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- BinaryLogicalOperation op)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
- k->configure(input1, input2, output, op);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
deleted file mode 100644
index 8e106737c..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLCast.h"
-
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-using namespace arm_compute;
-
-void CLCast::configure(ICLTensor *input, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp
deleted file mode 100644
index f6a745a25..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLComparisonOp.h"
-
-#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLComparisonOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- const ComparisonOperation &op)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLComparisonOpKernel>();
- k->configure(input1, input2, output, op);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
deleted file mode 100644
index c2e4ca9ff..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-using namespace arm_compute;
-
-void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
deleted file mode 100644
index 2781784ca..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
-
-#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
-
-using namespace arm_compute;
-
-void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
- const ICLTensor *lookups)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>();
- k->configure(input, output, lookups);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp
deleted file mode 100644
index 411fa8700..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLExp.h"
-
-#include "arm_compute/core/CL/kernels/CLExpKernel.h"
-
-using namespace arm_compute;
-
-void CLExp::configure(const ICLTensor *input, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLExpKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp
deleted file mode 100644
index fb056fe45..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLGather.h"
-
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
-
-using namespace arm_compute;
-
-void CLGather::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLGatherKernel>();
- k->configure(input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status CLGather::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output)
-{
- return CLGatherKernel::validate(input1, input2, output);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
deleted file mode 100644
index 7180e9356..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h"
-
-#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
-
-using namespace arm_compute;
-
-void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
- const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>();
- k->configure(lookups, keys, input, output, hits);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
deleted file mode 100644
index be35ea732..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLNeg.h"
-
-#include "arm_compute/core/CL/kernels/CLNegKernel.h"
-
-using namespace arm_compute;
-
-void CLNeg::configure(ICLTensor *input, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp
deleted file mode 100644
index 276c4557a..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-using namespace arm_compute;
-
-CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {}
-
-void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
- const NormalizationLayerInfo &norm_info)
-{
- ARM_COMPUTE_ERROR_ON(input == nullptr);
-
- // Configure normalization kernel
- _norm_kernel.configure(input, output, norm_info);
-
- // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
- _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
-}
-
-Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const NormalizationLayerInfo &norm_info)
-{
- return CLNormalizationLayerExKernel::validate(input, output, norm_info);
-}
-
-void CLNormalizationLayerEx::run()
-{
- // Run border handler
- CLScheduler::get().enqueue(_border_handler, false);
-
- // Run normalization kernel
- CLScheduler::get().enqueue(_norm_kernel);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
deleted file mode 100644
index 38adedd10..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLPReLU.h"
-
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>();
- k->configure(input, alpha, output);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
deleted file mode 100644
index 5265b6c34..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
-
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-
-using namespace arm_compute;
-
-void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>();
- k->configure(input, output, pad_size);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp
deleted file mode 100644
index fb363270d..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLPermuteEx.h"
-
-#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h"
-
-using namespace arm_compute;
-
-void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output,
- const PermutationVector &perm)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>();
- k->configure(input, output, perm);
- _kernel = std::move(k);
-}
-
-Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm));
- return Status{};
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp
deleted file mode 100644
index dc0baa8dd..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLPixelWiseDivision.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h"
-
-using namespace arm_compute;
-
-void CLPixelWiseDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- float scale, ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseDivisionKernel>();
- k->configure(input1, input2, output, scale, overflow_policy, rounding_policy);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
-
-Status CLPixelWiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy)
-{
- return CLPixelWiseDivisionKernel::validate(input1, input2, output, scale, overflow_policy,
- rounding_policy);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
deleted file mode 100644
index 2b8d82706..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLReduceOperation.h"
-
-#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-using namespace arm_compute;
-
-CLReduceOperation::CLReduceOperation()
- : _input(nullptr), _output(nullptr), _axis(), _interm_tensors(), _reduce_kernels()
-{
-}
-
-Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output,
- const std::set<uint32_t> &axis, const ReduceOperation &op)
-{
- const size_t num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = num_of_kernels - 1;
-
- // Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
-
- // Create intermediate tensor info
- TensorShape shape{input->tensor_shape()};
-
- auto it = axis.begin();
- for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it)
- {
- shape.set(*it, 1);
- interm_tensors[i].set_data_type(input->data_type());
- interm_tensors[i].set_tensor_shape(shape);
- interm_tensors[i].set_num_channels(input->num_channels());
- }
-
- // Set a vector that is ordered ITensorInfo sequentially.
- std::vector<const ITensorInfo *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; ++i)
- {
- tensors.emplace_back(interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Validate ReduceOperation only on all kernels
- it = axis.begin();
- for (size_t i = 0; i < num_of_kernels; ++i, ++it)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
- }
-
- return Status{};
-}
-
-void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
- const std::set<uint32_t> &axis, ReduceOperation op)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op));
-
- _axis = axis;
-
- _input = input;
- _output = output;
-
- // NOTE The axis must have no duplication.
- const size_t num_of_kernels = axis.size();
- const size_t num_of_interm_tensors = num_of_kernels - 1;
-
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _reduce_kernels =
- arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
-
- TensorShape shape{input->info()->tensor_shape()};
- auto it = axis.begin();
- for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it)
- {
- shape.set(*it, 1);
- _interm_tensors[i].allocator()->init(
- TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()));
- _interm_tensors[i].allocator()->allocate();
- }
-
- // Set a vector that is ordered ICLTensors sequentially.
- std::vector<ICLTensor *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; ++i)
- {
- tensors.emplace_back(_interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Apply ReduceOperation on all kernels
- it = axis.begin();
- for (size_t i = 0; i < num_of_kernels; ++i, ++it)
- {
- _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op);
- }
-}
-
-void CLReduceOperation::run()
-{
- const size_t num_of_kernels = _axis.size();
- for (size_t i = 0; i < num_of_kernels; ++i)
- {
- CLScheduler::get().enqueue(_reduce_kernels[i]);
- }
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
deleted file mode 100644
index c03826891..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h"
-
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
-
-using namespace arm_compute;
-
-void CLSpaceToBatchND::configure(const ICLTensor *input, const ICLTensor *block_size,
- const ICLTensor *padding_size, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLSpaceToBatchNDKernel>();
- k->configure(input, block_size, padding_size, output);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
deleted file mode 100644
index 0f455f96f..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
-
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-using namespace arm_compute;
-
-void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp
deleted file mode 100644
index dc6e4af44..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLSquaredDifference.h"
-
-#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLSquaredDifference::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLSquaredDifferenceKernel>();
- k->configure(input1, input2, output);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp
deleted file mode 100644
index be7353493..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h"
-
-#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h"
-
-using namespace arm_compute;
-
-void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask,
- int32_t endMask, int32_t shrinkAxisMask)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>();
- k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask);
- _kernel = std::move(k);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
deleted file mode 100644
index 19177497c..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLTopKV2.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-
-#include "../../topk_v2.h"
-
-namespace arm_compute
-{
-
-CLTopKV2::CLTopKV2()
- : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0),
- _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(),
- _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(),
- _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr),
- _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr), _qs_kernel(),
- _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(),
- _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(),
- _reorder_negatives_kernel(), _store_kernel()
-{
-}
-
-void CLTopKV2::configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices,
- int total_bits, int bits)
-{
- _total_bits = total_bits;
- _bits = bits;
- _n = input->info()->tensor_shape()[0];
-
- // _total_bits should be divided by _bits.
- ARM_COMPUTE_ERROR_ON((_total_bits % _bits) != 0);
-
- _k = k;
- _radix = 1 << bits;
-
- _input = input;
- _values = values;
- _indices = indices;
-
- std::string topk_env;
-
- char *env = getenv("ACL_TOPKV2");
- if (env)
- topk_env = env;
-
- if (topk_env == "GPU_SINGLE")
- {
- _qs_idx_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
- _qs_temp_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
-
- _qs_kernel.configure(input, values, indices, &_qs_idx_buf, &_qs_temp_buf, k, _n);
- }
- else if (topk_env == "GPU")
- {
- // n should be divided by (_GROUPS * _ITEMS)
- ARM_COMPUTE_ERROR_ON((_n % (_GROUPS * _ITEMS)) != 0);
-
- _hist_buf_size = _radix * _GROUPS * _ITEMS;
- _glob_sum_buf_size = _HISTOSPLIT;
-
- _hist_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- sizeof(cl_int) * _hist_buf_size);
- _glob_sum_buf =
- cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- sizeof(cl_int) * _glob_sum_buf_size);
- _temp_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- sizeof(cl_int) * _glob_sum_buf_size);
- _first_negative_idx_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int));
- _in_key_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n);
- _out_key_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n);
- _in_ind_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
- _out_ind_buf = cl::Buffer(CLScheduler::get().context(),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n);
-
- _p_in_key_buf = &_in_key_buf;
- _p_out_key_buf = &_out_key_buf;
- _p_in_ind_buf = &_in_ind_buf;
- _p_out_ind_buf = &_out_ind_buf;
-
- _init_kernel.configure(input, _p_in_key_buf, _p_in_ind_buf, _n);
- _hist_kernel.configure(&_hist_buf, bits, _n);
- _scan_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits);
- _glob_scan_hist_kernel.configure(&_glob_sum_buf, &_temp_buf, bits);
- _paste_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits);
- _reorder_kernel.configure(&_hist_buf, bits, _n);
- _find_first_negative_kernel.configure(&_first_negative_idx_buf, _n);
- _reorder_negatives_kernel.configure(&_first_negative_idx_buf, _n);
- _store_kernel.configure(values, indices, k, _n);
- }
- else
- {
- // DO NOTHING for CPU.
- }
-}
-
-void CLTopKV2::run()
-{
- std::string topk_env;
-
- char *env = getenv("ACL_TOPKV2");
- if (env)
- topk_env = env;
-
- if (topk_env == "GPU_SINGLE")
- {
- run_on_gpu_single_quicksort();
- }
- else if (topk_env == "GPU")
- {
- run_on_gpu();
- }
- else
- {
- run_on_cpu();
- }
-}
-
-void CLTopKV2::run_on_gpu_single_quicksort()
-{
- // This is a single threaded quick sort implementation.
- CLScheduler::get().enqueue(_qs_kernel, false);
-
- arm_compute::CLScheduler::get().sync();
-}
-
-void CLTopKV2::run_on_gpu()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- // 1. CLTopKV2Init set key buffer and index buffer.
- // - Key buffer is set as the same value of the layer's input
- // - Values in the index buffer are set as their indices.
- CLScheduler::get().enqueue(_init_kernel, false);
-
- int n_passes = _total_bits / _bits;
-
- // 2. Repeat (total_bits/bits) times.
- // - total_bits is the number of bits of the data type (e.g., 32 for float)
- // - bits defines number of buckets (e.g. 16 buckets where bit is 4)
- for (int pass = 0; pass < n_passes; ++pass)
- {
- arm_compute::CLScheduler::get().sync();
-
- // 2.1. Calculate histogram with _GROUPS * _ITEMS threads
- _hist_kernel.setPass(pass, _p_in_key_buf);
- CLScheduler::get().enqueue(_hist_kernel, false);
-
- // 2.2. Calculate prefix sum locally with multiple threads
- CLScheduler::get().enqueue(_scan_hist_kernel, false);
- // 2.3. Calculate prefix sum within a work group
- CLScheduler::get().enqueue(_glob_scan_hist_kernel, false);
- // 2.4. Calculate global prefix sum
- CLScheduler::get().enqueue(_paste_hist_kernel, false);
-
- // 2.5. Reorder keys and indices based on the global prefix sum
- _reorder_kernel.setPass(pass, _p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, _p_out_ind_buf);
- CLScheduler::get().enqueue(_reorder_kernel, false);
-
- cl::Buffer *tmp;
- // swap key buffers
- tmp = _p_in_key_buf;
- _p_in_key_buf = _p_out_key_buf;
- _p_out_key_buf = tmp;
-
- // swap index buffers
- tmp = _p_in_ind_buf;
- _p_in_ind_buf = _p_out_ind_buf;
- _p_out_ind_buf = tmp;
- }
-
- // 3. Get the first negative index
- // Because we swap in_buf and out_buf at the end of the above for loop,
- // the output buffers are in bufs.
- _find_first_negative_kernel.setOutputBuffer(_p_in_key_buf);
- CLScheduler::get().enqueue(_find_first_negative_kernel, false);
-
- // 4. Correct odering of negatives
- // - Since radix sort does not consider negatives, negatives are considered as bigger values
- // than positives.
- // reordered data will be stored in _p_out_key_buf and _p_out_ind_buf
- _reorder_negatives_kernel.setBuffers(_p_in_key_buf, _p_out_key_buf, _p_in_ind_buf,
- _p_out_ind_buf);
- CLScheduler::get().enqueue(_reorder_negatives_kernel, false);
-
- // 5. Extract top k values from sorted keys and indices.
- _store_kernel.setOutputBuffers(_p_out_key_buf, _p_out_ind_buf);
- CLScheduler::get().enqueue(_store_kernel, false);
-
- arm_compute::CLScheduler::get().sync();
-
-#if 0
- // below code is left for debugging.
- int first_neg;
- q.enqueueReadBuffer(_first_negative_idx_buf, CL_TRUE, 0, sizeof(cl_int), &first_neg);
- std::cout << "first neg = " << first_neg << std::endl;
-
- float in_key[_n];
- q.enqueueReadBuffer(*_p_in_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, in_key);
- for(uint32_t i = 0 ; i < _n; ++i) {
- std::cout << "in_key[" << i << "] = " << in_key[i] << std::endl;
- }
-
- float out_key[_n];
- q.enqueueReadBuffer(*_p_out_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, out_key);
- for(uint32_t i = 0 ; i < _n; ++i) {
- std::cout << "out_key[" << i << "] = " << out_key[i] << std::endl;
- }
-
- int in_ind[_n];
- q.enqueueReadBuffer(*_p_in_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, in_ind);
- for(uint32_t i = 0 ; i < _n; ++i) {
- std::cout << "in_ind[" << i << "] = " << in_ind[i] << std::endl;
- }
-
- int out_ind[_n];
- q.enqueueReadBuffer(*_p_out_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, out_ind);
- for(uint32_t i = 0 ; i < _n; ++i) {
- std::cout << "out_ind[" << i << "] = " << out_ind[i] << std::endl;
- }
-
- int hist_buf[_hist_buf_size];
- q.enqueueReadBuffer(_hist_buf, CL_TRUE, 0, sizeof(cl_int)*_hist_buf_size, hist_buf);
- for(uint32_t i = 0 ; i < _hist_buf_size; ++i) {
- std::cout << "hist_buf[" << i << "] = " << hist_buf[i] << std::endl;
- }
-
- int glob_sum_buf[_glob_sum_buf_size];
- q.enqueueReadBuffer(_glob_sum_buf, CL_TRUE, 0, sizeof(cl_int)*_glob_sum_buf_size, glob_sum_buf);
- for(uint32_t i = 0 ; i < _glob_sum_buf_size; ++i) {
- std::cout << "glob_sum_buf[" << i << "] = " << glob_sum_buf[i] << std::endl;
- }
-
-#endif
-}
-
-void CLTopKV2::run_on_cpu()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
- // const Window& w = _topkv2_kernel.window();
-
- _input->map(q);
- _values->map(q);
- _indices->map(q);
-
- // int row_size = (w[0].end() - w[0].start()) / w[0].step();
- int row_size = _input->info()->tensor_shape()[0];
- int rank = _input->info()->num_dimensions();
-
- if (rank > 2)
- throw std::runtime_error("Not supported type.");
-
- int row_num = (rank == 2 ? _input->info()->tensor_shape()[1] : 1);
-
- if (_input->info()->data_type() == DataType::F32)
- {
- nnfw::rt::optimized_ops::TopK<float>(row_size, row_num, (float *)_input->buffer(), _k,
- (int32 *)_indices->buffer(), (float *)_values->buffer());
- }
- else if (_input->info()->data_type() == DataType::S32)
- {
- nnfw::rt::optimized_ops::TopK<int32_t>(row_size, row_num, (int32_t *)_input->buffer(), _k,
- (int32 *)_indices->buffer(),
- (int32_t *)_values->buffer());
- }
- else if (_input->info()->data_type() == DataType::QASYMM8)
- {
- nnfw::rt::optimized_ops::TopK<uint8_t>(row_size, row_num, (uint8_t *)_input->buffer(), _k,
- (int32 *)_indices->buffer(),
- (uint8_t *)_values->buffer());
- }
- else
- {
- throw std::runtime_error("Not supported type.");
- }
-
- _input->unmap(q);
- _values->unmap(q);
- _indices->unmap(q);
-}
-} // namespace arm_compute
diff --git a/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp
deleted file mode 100644
index 988e92715..000000000
--- a/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-using namespace arm_compute;
-
-NENormalizationLayerEx::NENormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_kernel(),
- _border_handler(), _input_squared()
-{
-}
-
-void NENormalizationLayerEx::configure(const ITensor *input, ITensor *output,
- const NormalizationLayerInfo &norm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(),
- input->info()->quantization_info());
- _input_squared.allocator()->init(tensor_info);
-
- // Manage intermediate buffers
- _memory_group.manage(&_input_squared);
-
- // Configure kernels
- _norm_kernel.configure(input, &_input_squared, output, norm_info);
- _multiply_kernel.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE,
- RoundingPolicy::TO_ZERO);
- _border_handler.configure(&_input_squared, _norm_kernel.border_size(), BorderMode::CONSTANT,
- PixelValue(0.0f));
-
- // Allocate the tensor once the configure methods have been called
- _input_squared.allocator()->allocate();
-}
-
-Status NENormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const NormalizationLayerInfo &norm_info)
-{
- // Perform validation step
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-
- ARM_COMPUTE_RETURN_ON_ERROR(
- NENormalizationLayerExKernel::validate(input, input, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate(
- input, input, output, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
-
- return Status{};
-}
-
-void NENormalizationLayerEx::run()
-{
- _memory_group.acquire();
-
- NEScheduler::get().schedule(&_multiply_kernel, Window::DimY);
- NEScheduler::get().schedule(&_border_handler, Window::DimY);
- NEScheduler::get().schedule(&_norm_kernel, Window::DimY);
-
- _memory_group.release();
-}
diff --git a/libs/ARMComputeEx/src/runtime/topk_v2.h b/libs/ARMComputeEx/src/runtime/topk_v2.h
deleted file mode 100644
index f94effea1..000000000
--- a/libs/ARMComputeEx/src/runtime/topk_v2.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file topk_v2.h
- * @brief This file contains TopK method and TopContainer class for TopK operation
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__
-#define __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__
-
-typedef int32_t int32;
-
-namespace nnfw
-{
-namespace rt
-{
-namespace optimized_ops
-{
-/**
- * @brief class to define TopK operation
- * @note The follwing codes are impemented and modified while referring to TFLite topk_v2.cc file.
- * TopK_v2 of NN Runtime supports TENSOR_FLOAT32, TENSOR_QUANT8_ASYMM, TENSOR_INT32 other than
- * TFLite.
- * (TFLite additionaly supports kTfLiteInt64.)
- *
- * The class that collects top indexes of k values. Based on template
- * tensorflow::gtl::TopN<> but, for optimization,
- * it re-uses the same container.
- */
-template <typename T> class TopContainer
-{
-public:
- /**
- * @brief Prevent default constructor of of this class
- */
- TopContainer() = delete;
- /**
- * @brief Constructor with params
- * @param [in] row_size Size of row in data
- * @param [in] k The top k predictions
- */
- TopContainer(int32 k, int32 row_size) : k_(k), container_(), values_(nullptr)
- {
- container_.reserve(std::min(k, row_size) + 1);
- }
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * @param [in] topContainer To copy
- */
- TopContainer(const TopContainer &) = delete;
- /*
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * @param [in] topContainer To copy
- * @return Reference of TopContainer
- */
- TopContainer &operator=(const TopContainer &) = delete;
-
- /**
- * @brief Start collecting
- * @param [in] values To set as values
- * @return N/A
- */
- void start_collecting(const T *values)
- {
- values_ = values;
- container_.clear();
- }
-
- /**
- * @brief Push a value to be compared for topk
- * @param [in] a A value to compare
- * @return N/A
- */
- void push(int32 a)
- {
- auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); };
- if (container_.size() <= (size_t)k_)
- {
- container_.push_back(a);
- if (container_.size() == (size_t)(k_ + 1))
- {
- std::make_heap(container_.begin(), container_.end(), comparator);
- std::pop_heap(container_.begin(), container_.end(), comparator);
- }
- }
- else if (comparator(a, container_.front()))
- {
- container_.back() = a;
- std::push_heap(container_.begin(), container_.end(), comparator);
- std::pop_heap(container_.begin(), container_.end(), comparator);
- }
- }
-
- /**
- * @brief Get sorted result from pushed values
- * @return Reference of vector with sorted values
- */
- const std::vector<int32> &sorted_result()
- {
- auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); };
- if (container_.size() <= (size_t)(k_))
- {
- std::sort(container_.begin(), container_.end(), comparator);
- }
- else
- {
- std::sort_heap(container_.begin(), container_.end() - 1, comparator);
- container_.resize(k_);
- }
- return container_;
- }
-
-private:
- int32 k_;
- std::vector<int32> container_;
- const T *values_ = nullptr;
-
- bool compare_fun(int32 a, int32 b) const
- {
- if (values_[b] < values_[a])
- {
- return true;
- }
- else if (values_[b] > values_[a])
- {
- return false;
- }
- else
- {
- return a < b;
- }
- }
-};
-
-/**
- * @brief Operates TopK operation with params
- * @param [in] row_size Size of row in data
- * @param [in] num_rows The number of rows in data
- * @param [in] data To be operated in
- * @param [in] k The top k predictions
- * @param [out] output_indexes Indexes of targets in the top k predictions
- * @param [out] output_values Values of targets in the top k predictions
- * @return N/A
- */
-template <typename T>
-void TopK(int32 row_size, int32 num_rows, const T *data, int32 k, int32 *output_indexes,
- T *output_values)
-{
- TopContainer<T> topc(k, row_size);
- for (int row = 0; row < num_rows; ++row)
- {
- const T *values_row = data + row * row_size;
- topc.start_collecting(values_row);
- for (int32 c = 0; c < row_size; ++c)
- {
- topc.push(c);
- }
-
- // Prepare output buffers.
- int32 *indexes_row = output_indexes + row * k;
- T *output_row = output_values + row * k;
- // We always assume that the output is sorted.
- const auto &top_k = topc.sorted_result();
- std::copy(top_k.begin(), top_k.end(), indexes_row);
- std::transform(top_k.begin(), top_k.end(), output_row,
- [values_row](const int32 loc) { return values_row[loc]; });
- }
-}
-
-} // namespace optimized_ops
-} // namespace rt
-} // namespace nnfw
-
-#endif // __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__
diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt
deleted file mode 100644
index 99d2028f4..000000000
--- a/libs/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# Add all subdirectories.
-# Each library in sub-directory must have it's own CMakeLists.txt
-# to build library's binaries or to support interface.
-add_subdirectories()
diff --git a/libs/cpp14/CMakeLists.txt b/libs/cpp14/CMakeLists.txt
deleted file mode 100644
index bba9e132d..000000000
--- a/libs/cpp14/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_library(nnfw_lib_cpp14 INTERFACE)
-target_include_directories(nnfw_lib_cpp14 INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/libs/cpp14/include/cpp14/memory.h b/libs/cpp14/include/cpp14/memory.h
deleted file mode 100644
index b3e678baa..000000000
--- a/libs/cpp14/include/cpp14/memory.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * @file memory.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains @c make_unique which is not supported by C++11
- */
-#ifndef __NNFW_CPP14_MEMORY_H__
-#define __NNFW_CPP14_MEMORY_H__
-
-#include <memory>
-
-namespace nnfw
-{
-namespace cpp14
-{
-/**
- * @brief Provide @c make_unique function supported from C++14
- * @param[in] args List of arguments with which an instance of T will be constructed.
- * @return @c std::unique_ptr of an instance of type T
- */
-template <typename T, typename... Args> std::unique_ptr<T> make_unique(Args &&... args)
-{
- // NOTE std::make_unique is missing in C++11 standard
- return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
-} // napesapce cpp14
-} // namespace nnfw
-
-#endif // __NNFW_CPP14_MEMORY_H__
diff --git a/libs/misc/CMakeLists.txt b/libs/misc/CMakeLists.txt
deleted file mode 100644
index cd01695fb..000000000
--- a/libs/misc/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# Library `nnfw_lib_misc`
-set(NNFW_UTILITY_SRCS src/environment.cpp)
-list(APPEND NNFW_UTILITY_SRCS src/tensor/Shape.cpp)
-list(APPEND NNFW_UTILITY_SRCS src/tensor/NonIncreasingStride.cpp)
-list(APPEND NNFW_UTILITY_SRCS src/tensor/IndexFormatter.cpp)
-list(APPEND NNFW_UTILITY_SRCS src/tensor/Comparator.cpp)
-
-add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS})
-target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON)
-
-add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp")
-target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc)
diff --git a/libs/misc/examples/tensor_index_iterator.cpp b/libs/misc/examples/tensor_index_iterator.cpp
deleted file mode 100644
index 8a19dac87..000000000
--- a/libs/misc/examples/tensor_index_iterator.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/IndexIterator.h"
-
-#include <array>
-
-#include <iostream>
-#include <algorithm>
-
-#include <cassert>
-
-void test_iterate(void)
-{
- const nnfw::misc::tensor::Shape shape{3, 4, 7};
-
- std::array<int, 3 * 4 * 7> array;
-
- array.fill(0);
-
- using nnfw::misc::tensor::iterate;
- using nnfw::misc::tensor::Index;
-
- iterate(shape) << [&](const Index &index) {
- assert(index.rank() == shape.rank());
-
- const size_t rank = index.rank();
-
- uint32_t offset = index.at(0);
-
- for (size_t axis = 1; axis < rank; ++axis)
- {
- offset *= shape.dim(axis);
- offset += index.at(axis);
- }
-
- array[offset] += 1;
- };
-
- assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
-}
-
-int main(int argc, char **argv)
-{
- test_iterate();
-
- nnfw::misc::tensor::Shape shape{3, 4, 3, 4};
-
- std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl;
-
- nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) {
- std::cout << "rank: " << index.rank() << std::endl;
-
- for (size_t d = 0; d < index.rank(); ++d)
- {
- std::cout << " offset(" << d << ") = " << index.at(d) << std::endl;
- }
- };
-
- return 0;
-}
diff --git a/libs/misc/include/misc/EnvVar.h b/libs/misc/include/misc/EnvVar.h
deleted file mode 100644
index 47206d4c0..000000000
--- a/libs/misc/include/misc/EnvVar.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file EnvVar.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::EnvVar class
- */
-
-#ifndef __NNFW_MISC_ENV_VAR__
-#define __NNFW_MISC_ENV_VAR__
-
-#include <algorithm>
-#include <array>
-#include <cstdlib>
-#include <string>
-
-namespace nnfw
-{
-namespace misc
-{
-/**
- * @brief Class to access environment variable
- */
-class EnvVar
-{
-public:
- /**
- * @brief Construct a new EnvVar object
- * @param[in] key environment variable
- */
- EnvVar(const std::string &key)
- {
- const char *value = std::getenv(key.c_str());
- if (value == nullptr)
- {
- // An empty string is considered as an empty value
- _value = "";
- }
- else
- {
- _value = value;
- }
- }
-
- /**
- * @brief Get environment variable of string type
- * @param[in] def Default value of environment variable
- * @return Defaut value passed as a parameter when there is no environment variable,
- * otherwise the value of environment variable passed into constructor
- */
- std::string asString(const std::string &def) const
- {
- if (_value.empty())
- return def;
- return _value;
- }
-
- /**
- * @brief Get environment variable of boolean type
- * @param[in] def Default value of environment variable
- * @return Defaut value passed as a parameter when there is no environment variable,
- * otherwise the value of environment variable passed into constructor
- */
- bool asBool(bool def) const
- {
- if (_value.empty())
- return def;
- static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
- auto false_found = std::find(false_list.begin(), false_list.end(), _value);
- return (false_found == false_list.end());
- }
-
- /**
- * @brief Get environment variable of int type
- * @param[in] def Default value of environment variable
- * @return Defaut value passed as a parameter when there is no environment variable,
- * otherwise the value of environment variable passed into constructor
- */
- int asInt(int def) const
- {
- if (_value.empty())
- return def;
- return std::stoi(_value);
- }
-
-private:
- std::string _value;
-};
-
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_ENV_VAR__
diff --git a/libs/misc/include/misc/benchmark.h b/libs/misc/include/misc/benchmark.h
deleted file mode 100644
index fe5b97585..000000000
--- a/libs/misc/include/misc/benchmark.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file benchmark.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::benchmark::Accumulator class
- */
-#ifndef __NNFW_MISC_BENCHMARK_H__
-#define __NNFW_MISC_BENCHMARK_H__
-
-#include <chrono>
-
-namespace nnfw
-{
-namespace misc
-{
-// Benckmark support
-namespace benchmark
-{
-
-/**
- * @brief Class to accumulate time during benchmark
- */
-template <typename T> class Accumulator
-{
-public:
- /**
- * @brief Construct a new Accumulator object
- * @param[in] ref Object to keep time duration
- */
- Accumulator(T &ref) : _ref(ref)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Return the reference of @c ref passed to constructor
- * @return Reference of @c ref
- */
- T &operator()(void) { return _ref; }
-
-private:
- T &_ref;
-};
-
-/**
- * @brief Run passed function and returns accumulated time
- * @tparam T Period used by @c std::chrono::duration_cast
- * @tparam Callable Function type to benchmark
- * @param[in] acc Accumulated time after running @cb
- * @param[in] cb Function to run and benchmark
- * @return Accumulated time
- */
-template <typename T, typename Callable>
-Accumulator<T> &operator<<(Accumulator<T> &&acc, Callable cb)
-{
- auto begin = std::chrono::steady_clock::now();
- cb();
- auto end = std::chrono::steady_clock::now();
-
- acc() += std::chrono::duration_cast<T>(end - begin);
-
- return acc;
-}
-
-template <typename T> Accumulator<T> measure(T &out) { return Accumulator<T>(out); }
-
-} // namespace benchmark
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_BENCHMARK_H__
diff --git a/libs/misc/include/misc/environment.h b/libs/misc/include/misc/environment.h
deleted file mode 100644
index 8e6bd00d5..000000000
--- a/libs/misc/include/misc/environment.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file environment.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains utility functions and classes to access environment variables
- */
-
-#ifndef __UTIL_ENVIRONMENT_H__
-#define __UTIL_ENVIRONMENT_H__
-
-namespace nnfw
-{
-namespace misc
-{
-
-/**
- * @brief Get the environment variable of int type
- * @param[in] name Name of the environment variable
- * @param[in] defaultValue Default value when the value of environment variable does not exist
- * @return The int value of the environment variable
- */
-int get_env_int(const char *name, int defaultValue = 0);
-
-/**
- * @brief Get the environment variable of bool type
- * @param[in] name Name of the environment variable
- * @param[in] defaultValue Default value when the value of environment variable does not exist
- * @return @c 0 if the value of the environment variable is @c "0", @c 1 in case of other number
- */
-bool get_env_bool(const char *name, bool defaultValue = false);
-}
-}
-
-#include <string>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace env
-{
-/**
- * @brief Parent struct of @ref IntAccessor and @ref FloatAccessor
- * @tparam T Type of the value of environment variable
- */
-template <typename T> struct Accessor
-{
- /**
- * @brief Destroy the Accessor object
- */
- virtual ~Accessor() = default;
- /**
- * @brief Read the value of environment variable
- * @param[out] out The value of environment variable
- * @return @c true if accessing environment variable is successful,
- * @c false if there is exist no such environment variable
- */
- virtual bool access(T &out) const = 0;
-};
-
-/**
- * @brief Class to read int environment variable
- */
-class IntAccessor : public Accessor<int>
-{
-public:
- /**
- * @brief Construct a new IntAccessor object
- * @param[in] tag Name of environment variable
- */
- IntAccessor(const std::string &tag);
-
-public:
- /**
- * @brief Read the value of environment variable
- * @param[out] out The value of environment variable
- * @return @c true if accessing environment variable is successful,
- * @c false if there is exist no such environment variable
- */
- bool access(int &out) const override;
-
-private:
- std::string _tag;
-};
-
-/**
- * @brief Class to read float environment variable
- */
-class FloatAccessor : public Accessor<float>
-{
-public:
- /**
- * @brief Construct a new FloatAccessor object
- * @param[in] tag Name of environment variable
- */
- FloatAccessor(const std::string &tag);
-
-public:
- /**
- * @brief Read the value of environment variable
- * @param[out] out The value of environment variable
- * @return @c true if accessing environment variable is successful,
- * @c false if there is exist no such environment variable
- */
- bool access(float &out) const override;
-
-private:
- std::string _tag;
-};
-
-} // namespace env
-} // namespace misc
-} // namespace nnfw
-
-#endif // __UTIL_ENVIRONMENT_H__
diff --git a/libs/misc/include/misc/feature/Index.h b/libs/misc/include/misc/feature/Index.h
deleted file mode 100644
index a361d8dd2..000000000
--- a/libs/misc/include/misc/feature/Index.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Index.h
- * @brief This file contains Index class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FEATURE_INDEX_H__
-#define __NNFW_MISC_FEATURE_INDEX_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace feature
-{
-
-/**
- * @brief Class to have the index information for calculating the offset.
- */
-class Index
-{
-public:
- /**
- * @brief Construct Index object using default constrcutor
- */
- Index() = default;
-
-public:
- /**
- * @brief Construct Index object with three indexes of dimensions
- * @param[in] ch The depth index
- * @param[in] row The heigth index
- * @param[in] col The width index
- */
- Index(int32_t ch, int32_t row, int32_t col) : _batch{1}, _ch{ch}, _row{row}, _col{col}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct Index object with four indexes of dimensions
- * @param[in] batch The batch index
- * @param[in] ch The depth index
- * @param[in] row The height index
- * @param[in] col The width index
- */
- Index(int32_t batch, int32_t ch, int32_t row, int32_t col)
- : _batch{batch}, _ch{ch}, _row{row}, _col{col}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get the batch index
- * @return The batch index
- */
- int32_t batch(void) const { return _batch; }
- /**
- * @brief Get the depth index
- * @return The depth index
- */
- int32_t ch(void) const { return _ch; }
- /**
- * @brief Get the height index
- * @return The height index
- */
- int32_t row(void) const { return _row; }
- /**
- * @brief Get the width index
- * @return The width index
- */
- int32_t col(void) const { return _col; }
-
-public:
- /**
- * @brief Get the batch index as the lvalue reference
- * @return The reference of the batch value
- */
- int32_t &batch(void) { return _batch; }
- /**
- * @brief Get the depth index as the lvalue reference
- * @return The reference of the depth value
- */
- int32_t &ch(void) { return _ch; }
- /**
- * @brief Get the height index as the lvalue reference
- * @return The reference of the height value
- */
- int32_t &row(void) { return _row; }
- /**
- * @brief Get the width index as the lvalue reference
- * @return The reference of the width value
- */
- int32_t &col(void) { return _col; }
-
-private:
- /**
- * @brief The batch index
- */
- int32_t _batch;
- /**
- * @brief The depth index
- */
- int32_t _ch;
- /**
- * @brief The height index
- */
- int32_t _row;
- /**
- * @brief The width index
- */
- int32_t _col;
-};
-
-} // namespace feature
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_INDEX_H__
diff --git a/libs/misc/include/misc/feature/IndexIterator.h b/libs/misc/include/misc/feature/IndexIterator.h
deleted file mode 100644
index 1cf675526..000000000
--- a/libs/misc/include/misc/feature/IndexIterator.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IndexIterator.h
- * @brief This file contains IndexIterator class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__
-#define __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__
-
-#include "misc/feature/Shape.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace feature
-{
-
-/**
- * @brief Class to iterate Callable with Index of feature
- */
-class IndexIterator
-{
-public:
- /**
- * @brief Construct IndexIterator object with Shape of feature
- * @param[in] shape Shape reference of feature
- */
- IndexIterator(const Shape &shape) : _shape{shape}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Call a function iterated
- * @param[in] cb A callback function
- * @return Current IndexIterator object
- */
- template <typename Callable> IndexIterator &iter(Callable cb)
- {
- for (int32_t batch = 0; batch < _shape.N; ++batch)
- {
- for (int32_t ch = 0; ch < _shape.C; ++ch)
- {
- for (int32_t row = 0; row < _shape.H; ++row)
- {
- for (int32_t col = 0; col < _shape.W; ++col)
- {
- cb(batch, ch, row, col);
- }
- }
- }
- }
-
- return (*this);
- }
-
-private:
- /**
- * @brief Shape for feature
- */
- const Shape _shape;
-};
-
-/**
- * @brief Create an object of IndexIterator for feature
- * @param[in] Shape reference of feature
- * @return Created IndexIterator object
- */
-static inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; }
-
-/**
- * @brief Call a function iterated using IndexIterator of feature
- * Overloaded operator<<
- * @param[in] it An IndexIterator reference
- * @param[in] cb A callback function
- * @return created IndexIterator object
- */
-template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb)
-{
- return it.iter(cb);
-}
-
-} // namespace feature
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__
diff --git a/libs/misc/include/misc/feature/Object.h b/libs/misc/include/misc/feature/Object.h
deleted file mode 100644
index 7af0e28f4..000000000
--- a/libs/misc/include/misc/feature/Object.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Object.h
- * @brief This file contains Object class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FEATURE_OBJECT_H__
-#define __NNFW_MISC_FEATURE_OBJECT_H__
-
-#include "misc/feature/Shape.h"
-#include "misc/feature/Index.h"
-#include "misc/feature/Reader.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace feature
-{
-
-/**
- * @brief Class to have information of the operand for feature
- */
-template <typename T> class Object final : public Reader<T>
-{
-public:
- using Generator = std::function<T(const Shape &shape, const Index &index)>;
-
-public:
- /**
- * @brief Construct Object object with Shape of feature and set value used by Generator
- * @param[in] shape Reference of Shape for feature
- * @param[in] fn A function to set values of operand tensor
- */
- Object(const Shape &shape, const Generator &fn) : _shape{shape}
- {
- _value.resize(_shape.C * _shape.H * _shape.W);
-
- for (int32_t ch = 0; ch < _shape.C; ++ch)
- {
- for (int32_t row = 0; row < _shape.H; ++row)
- {
- for (int32_t col = 0; col < _shape.W; ++col)
- {
- _value.at(offsetOf(ch, row, col)) = fn(_shape, Index{ch, row, col});
- }
- }
- }
- }
-
-public:
- /**
- * @brief Get Shape of feature as the reference
- * @return The reference of the width value
- */
- const Shape &shape(void) const { return _shape; }
-
-public:
- /**
- * @brief Get the value used by three indexes
- * @param[in] ch The depth index
- * @param[in] row The height index
- * @param[in] col The width index
- * @return The value at the offset
- */
- T at(uint32_t ch, uint32_t row, uint32_t col) const override
- {
- return _value.at(offsetOf(ch, row, col));
- }
-
-private:
- /**
- * @brief Get the offset value at three indexes
- * @param[in] ch The depth index
- * @param[in] row The height index
- * @param[in] col The width index
- * @return The offset value
- */
- uint32_t offsetOf(uint32_t ch, uint32_t row, uint32_t col) const
- {
- return ch * _shape.H * _shape.W + row * _shape.W + col;
- }
-
-private:
- /**
- * @brief Shape of operand
- */
- Shape _shape;
- /**
- * @brief The tensor vector of operand
- */
- std::vector<T> _value;
-};
-
-} // namespace feature
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_OBJECT_H__
diff --git a/libs/misc/include/misc/feature/Reader.h b/libs/misc/include/misc/feature/Reader.h
deleted file mode 100644
index b09209789..000000000
--- a/libs/misc/include/misc/feature/Reader.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Reader.h
- * @brief This file contains Reader class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FEATURE_READER_H__
-#define __NNFW_MISC_FEATURE_READER_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace feature
-{
-
-/**
- * @brief Class reads values of feature
- * The interface class
- */
-template <typename T> struct Reader
-{
- /**
- * @brief Destruct Reader object using default destructor
- */
- virtual ~Reader() = default;
-
- /**
- * @brief Get the value used by three indexes
- * @param[in] ch The depth index
- * @param[in] row The height index
- * @param[in] col The width index
- * @return The value at the offset
- */
- virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0;
- /**
- * @brief Get the value used by four indexes
- * @param[in] batch The batch index
- * @param[in] ch The depth index
- * @param[in] row The height index
- * @param[in] col The width index
- * @return The value at the offset
- */
- virtual T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const = 0;
-};
-
-} // namespace feature
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_READER_H__
diff --git a/libs/misc/include/misc/feature/Shape.h b/libs/misc/include/misc/feature/Shape.h
deleted file mode 100644
index 09881f58b..000000000
--- a/libs/misc/include/misc/feature/Shape.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Shape.h
- * @brief This file contains Shape class for feature
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FEATURE_SHAPE_H__
-#define __NNFW_MISC_FEATURE_SHAPE_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace feature
-{
-
-/**
- * @brief Structure to have values of dimensions for feature
- */
-struct Shape
-{
- int32_t N; /**< The batch value */
- int32_t C; /**< The depth value */
- int32_t H; /**< The height value */
- int32_t W; /**< The width value */
-
- /**
- * @brief Construct Shape object using default constrcutor
- */
- Shape() = default;
- /**
- * @brief Construct Shape object with three values of dimensions
- * @param[in] depth The depth value
- * @param[in] height The height value
- * @param[in] width The width value
- */
- Shape(int32_t depth, int32_t height, int32_t width) : N{1}, C{depth}, H{height}, W{width}
- {
- // DO NOTHING
- }
- /**
- * @brief Construct Shape object with four values of dimensions
- * @param[in] batch The batch value
- * @param[in] depth The depth value
- * @param[in] height The height value
- * @param[in] width The width value
- */
- Shape(int32_t batch, int32_t depth, int32_t height, int32_t width)
- : N{batch}, C{depth}, H{height}, W{width}
- {
- // DO NOTHING
- }
-};
-
-} // namespace feature
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_H__
diff --git a/libs/misc/include/misc/feature/TextFormatter.h b/libs/misc/include/misc/feature/TextFormatter.h
deleted file mode 100644
index e053f1c61..000000000
--- a/libs/misc/include/misc/feature/TextFormatter.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TextFormatter.h
- * @brief This file contains TextFormatter class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FEATURE_TEXT_FORMATTER_H__
-#define __NNFW_MISC_FEATURE_TEXT_FORMATTER_H__
-
-#include "misc/feature/Shape.h"
-#include "misc/feature/Reader.h"
-
-#include <ostream>
-#include <iomanip>
-#include <limits>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace feature
-{
-
-/**
- * @brief Class to print operand of feature to ostream in the given string format
- */
-template <typename T> class TextFormatter
-{
-public:
- /**
- * @brief Construct TextFormatter object with an operand's information.
- * @param[in] shape The shape of an operand
- * @param[in] data The data of an operand
- */
- TextFormatter(const Shape &shape, const Reader<T> &data) : _shape(shape), _data(data)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get Shape of feature as the lvalue reference
- * @return Shape of feature
- */
- const Shape &shape(void) const { return _shape; }
- /**
- * @brief Get Reader<T> that can read the data of an operand
- * @return Reader<T>
- */
- const Reader<T> &data(void) const { return _data; }
-
-private:
- /**
- * @brief Shape of feature
- */
- const Shape &_shape;
- /**
- * @brief Reader<T> that can read the data of an operand
- */
- const Reader<T> &_data;
-};
-
-/**
- * @brief Print operand of feature
- * @param[in] os Standard output stream
- * @param[in] fmt TextFormatter to print information of an operand
- * @return Standard output stream
- */
-template <typename T> std::ostream &operator<<(std::ostream &os, const TextFormatter<T> &fmt)
-{
- const auto &shape = fmt.shape();
-
- for (uint32_t ch = 0; ch < shape.C; ++ch)
- {
- os << " Channel " << ch << ":" << std::endl;
- for (uint32_t row = 0; row < shape.H; ++row)
- {
- os << " ";
- for (uint32_t col = 0; col < shape.W; ++col)
- {
- const auto value = fmt.data().at(ch, row, col);
- os << std::right;
- os << std::fixed;
- os << std::setw(std::numeric_limits<T>::digits10 + 2);
- os << std::setprecision(5);
- os << value;
- os << " ";
- }
- os << std::endl;
- }
- }
-
- return os;
-}
-
-} // namespace feature
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_TEXT_FORMATTER_H__
diff --git a/libs/misc/include/misc/fp32.h b/libs/misc/include/misc/fp32.h
deleted file mode 100644
index c310402ba..000000000
--- a/libs/misc/include/misc/fp32.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file fp32.h
- * @brief This file contains functions to compare float values
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_FP32_H__
-#define __NNFW_MISC_FP32_H__
-
-#include <cmath>
-#include <cfloat>
-#include <algorithm>
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace fp32
-{
-
-/**
- * @brief Get the difference between two float values as a relative value.
- * @param[in] lhs A float value to be compared
- * @param[in] rhs A float value to be compared
- * @return A relative value of difference between two float values.
- */
-inline float relative_diff(float lhs, float rhs)
-{
- const auto diff = std::fabs(lhs - rhs);
- const auto base = std::max(std::fabs(lhs), std::fabs(rhs));
-
- return diff / base;
-}
-
-/**
- * @brief Verify that an obtained float value is equal to the expected float value
- * by using FLT_EPSILON
- * @param[in] expected An expected float value to be compared
- * @param[in] obtained An obtained float value to be compared
- * @param[in] tolerance A tolerance value
- * @return @c true if both values are equal, otherwise @c false
- */
-inline bool epsilon_equal(float expected, float obtained, uint32_t tolerance = 1)
-{
- if (std::isnan(expected) && std::isnan(obtained))
- {
- return true;
- }
-
- // Let's use relative epsilon comparision
- const auto diff = std::fabs(expected - obtained);
- const auto max = std::max(std::fabs(expected), std::fabs(obtained));
-
- return diff <= (max * FLT_EPSILON * tolerance);
-}
-
-/**
- * @brief Verify that an obtained float value is equal to the expected float value
- * by comparing absolute tolerance value
- * @param[in] expected An expected float value to be compared
- * @param[in] obtained An obtained float value to be compared
- * @param[in] tolerance A tolerance value
- * @return @c true if both values are equal, otherwise @c false
- */
-inline bool absolute_epsilon_equal(float expected, float obtained, float tolerance = 0.001)
-{
- if (std::isnan(expected) && std::isnan(obtained))
- {
- return true;
- }
-
- // Let's use absolute epsilon comparision
- const auto diff = std::fabs(expected - obtained);
-
- return diff <= tolerance;
-}
-
-} // namespace fp32
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FP32_H__
diff --git a/libs/misc/include/misc/kernel/IndexIterator.h b/libs/misc/include/misc/kernel/IndexIterator.h
deleted file mode 100644
index 59e0f0095..000000000
--- a/libs/misc/include/misc/kernel/IndexIterator.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IndexIterator.h
- * @brief This file contains IndexIterator class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_KERNEL_INDEX_ITERATOR_H__
-#define __NNFW_MISC_KERNEL_INDEX_ITERATOR_H__
-
-#include "misc/kernel/Shape.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace kernel
-{
-
-/**
- * @brief Class to iterate Callable with Index of kernel
- */
-class IndexIterator
-{
-public:
- /**
- * @brief Construct IndexIterator object with Shape of kernel
- * @param[in] shape Shape reference of feature
- */
- IndexIterator(const Shape &shape) : _shape{shape}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Call a function iterated
- * @param[in] cb A callback function
- * @return Current IndexIterator object
- */
- template <typename Callable> IndexIterator &iter(Callable cb)
- {
- for (int32_t nth = 0; nth < _shape.N; ++nth)
- {
- for (int32_t ch = 0; ch < _shape.C; ++ch)
- {
- for (int32_t row = 0; row < _shape.H; ++row)
- {
- for (int32_t col = 0; col < _shape.W; ++col)
- {
- cb(nth, ch, row, col);
- }
- }
- }
- }
-
- return (*this);
- }
-
-private:
- const Shape _shape; /**< Shape for kernel */
-};
-
-/**
- * @brief Create an object of IndexIterator for kernel
- * @param[in] shape reference of feature
- * @return Created IndexIterator object
- */
-inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; }
-
-/**
- * @brief Call a function iterated using IndexIterator of kernel
- * Overloaded operator<<
- * @param[in] it An IndexIterator reference
- * @param[in] cb A callback function
- * @return Created IndexIterator object
- */
-template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb)
-{
- return it.iter(cb);
-}
-
-} // namespace kernel
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_INDEX_ITERATOR_H__
diff --git a/libs/misc/include/misc/kernel/RandomObject.h b/libs/misc/include/misc/kernel/RandomObject.h
deleted file mode 100644
index 4b58b0c7f..000000000
--- a/libs/misc/include/misc/kernel/RandomObject.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file RandomObject.h
- * @brief This file contains RandomObject class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_KERNEL_RANDOM_OBJECT_H__
-#define __NNFW_MISC_KERNEL_RANDOM_OBJECT_H__
-
-#include "misc/kernel/Shape.h"
-#include "misc/kernel/Reader.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace kernel
-{
-
-template <typename T> class RandomObject final : public Reader<T>
-{
-public:
- RandomObject(const Shape &shape) : _shape{shape}
- {
- const uint32_t size = _shape.N * _shape.C * _shape.H * _shape.W;
-
- // TODO Use random number
- for (uint32_t off = 0; off < size; ++off)
- {
- _value.emplace_back(static_cast<float>(off));
- }
- }
-
-public:
- const Shape &shape(void) const { return _shape; }
-
-public:
- T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
- {
- uint32_t index = 0;
-
- index += nth * _shape.C * _shape.H * _shape.W;
- index += ch * _shape.H * _shape.W;
- index += row * _shape.W;
- index += col;
-
- return _value.at(index);
- }
-
-private:
- const Shape _shape;
- std::vector<T> _value;
-};
-
-} // namespace kernel
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_KERNEL_RANDOM_OBJECT_H__
diff --git a/libs/misc/include/misc/kernel/Reader.h b/libs/misc/include/misc/kernel/Reader.h
deleted file mode 100644
index 019c809ee..000000000
--- a/libs/misc/include/misc/kernel/Reader.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Reader.h
- * @brief This file contains Reader structure
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_KERNEL_READER_H__
-#define __NNFW_MISC_KERNEL_READER_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace kernel
-{
-
-/**
- * @brief Structure to Reader
- */
-template <typename T> struct Reader
-{
- /**
- * @brief Destroy the Reader object as default
- */
- virtual ~Reader() = default;
-
- /**
- * @brief Get the value used by four indexes
- * @param[in] nth The kernel index
- * @param[in] ch The channel index
- * @param[in] row The row index
- * @param[in] col The column index
- * @return The value at the offset
- */
- virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0;
-};
-
-} // namespace kernel
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_KERNEL_READER_H__
diff --git a/libs/misc/include/misc/kernel/Shape.h b/libs/misc/include/misc/kernel/Shape.h
deleted file mode 100644
index 27d6a8bf0..000000000
--- a/libs/misc/include/misc/kernel/Shape.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Shape.h
- * @brief This file contains Shape structure
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_KERNEL_SHAPE_H__
-#define __NNFW_MISC_KERNEL_SHAPE_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace kernel
-{
-
-/**
- * @brief Structure to Shape
- */
-struct Shape
-{
- int32_t N; /**< The kernel index */
- int32_t C; /**< The channel index */
- int32_t H; /**< The height index */
- int32_t W; /**< The width index */
-
- /**
- * @brief Construct a new Shape object as default
- */
- Shape() = default;
-
- /**
- * @brief Construct a new Shape object with parameters
- * @param[in] count The kernel index
- * @param[in] depth The channel index
- * @param[in] height The height index
- * @param[in] width The width index
- */
- Shape(int32_t count, int32_t depth, int32_t height, int32_t width)
- : N{count}, C{depth}, H{height}, W{width}
- {
- // DO NOTHING
- }
-};
-
-} // namespace kernel
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_KERNEL_SHAPE_H__
diff --git a/libs/misc/include/misc/matrix/IndexIterator.h b/libs/misc/include/misc/matrix/IndexIterator.h
deleted file mode 100644
index 742ed3a65..000000000
--- a/libs/misc/include/misc/matrix/IndexIterator.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IndexIterator.h
- * @brief This file contains IndexIterator class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_MATRIX_INDEX_ITERATOR_H__
-#define __NNFW_MISC_MATRIX_INDEX_ITERATOR_H__
-
-#include "misc/matrix/Shape.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace matrix
-{
-
-/**
- * @brief Class to iterate Callable with Index of matrix
- */
-class IndexIterator
-{
-public:
- /**
- * @brief Construct IndexIterator object with Shape of matrix
- * @param[in] shape Shape reference of matrix
- */
- IndexIterator(const Shape &shape) : _shape{shape}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Call a function iterated
- * @param[in] cb A callback function
- * @return Current IndexIterator object
- */
- template <typename Callable> IndexIterator &iter(Callable cb)
- {
- for (uint32_t row = 0; row < _shape.H; ++row)
- {
- for (uint32_t col = 0; col < _shape.W; ++col)
- {
- cb(row, col);
- }
- }
-
- return (*this);
- }
-
-private:
- /**
- * @brief Shape for matrix
- */
- const Shape _shape;
-};
-
-/**
- * @brief Create an object of IndexIterator for matrix
- * @param[in] Shape reference of matrix
- * @return Created IndexIterator object
- */
-inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; }
-
-/**
- * @brief Call a function iterated using IndexIterator of matrix
- * Overloaded operator<<
- * @param[in] it An IndexIterator reference
- * @param[in] cb A callback function
- * @return created IndexIterator object
- */
-template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb)
-{
- return it.iter(cb);
-}
-
-} // namespace matrix
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_MATRIX_INDEX_ITERATOR_H__
diff --git a/libs/misc/include/misc/matrix/Reader.h b/libs/misc/include/misc/matrix/Reader.h
deleted file mode 100644
index ea222c9d1..000000000
--- a/libs/misc/include/misc/matrix/Reader.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Reader.h
- * @brief This file contains Reader class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_MATRIX_READER_H__
-#define __NNFW_MISC_MATRIX_READER_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace matrix
-{
-
-/**
- * @brief Class reads values of matrix
- * The interface class
- */
-template <typename T> struct Reader
-{
- /**
- * @brief Destruct Reader object using default destructor
- */
- virtual ~Reader() = default;
-
- /**
- * @brief Get the value used by two indexes
- * @param[in] row The height index
- * @param[in] col The width index
- * @return The value at the offset
- */
- virtual T at(uint32_t row, uint32_t col) const = 0;
-};
-
-} // namespace matrix
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_MATRIX_READER_H__
diff --git a/libs/misc/include/misc/matrix/Shape.h b/libs/misc/include/misc/matrix/Shape.h
deleted file mode 100644
index 8cbcc1e12..000000000
--- a/libs/misc/include/misc/matrix/Shape.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Shape.h
- * @brief This file contains Shape class for matrix
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_MATRIX_SHAPE_H__
-#define __NNFW_MISC_MATRIX_SHAPE_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace matrix
-{
-
-/**
- * @brief Structure to have values of dimensions for matrix
- */
-struct Shape
-{
- int32_t H; /**< The height value */
- int32_t W; /**< The width value */
-
- /**
- * @brief Construct Shape object using default constrcutor
- */
- Shape() = default;
-
- /**
- * @brief Construct Shape object with two values of dimensions
- * @param[in] height The height value
- * @param[in] width The width value
- */
- Shape(int32_t height, int32_t width) : H{height}, W{width}
- {
- // DO NOTHING
- }
-};
-
-} // namespace matrix
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_MATRIX_SHAPE_H__
diff --git a/libs/misc/include/misc/tensor/Comparator.h b/libs/misc/include/misc/tensor/Comparator.h
deleted file mode 100644
index 80f53043c..000000000
--- a/libs/misc/include/misc/tensor/Comparator.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Comparator.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Comparator class
- */
-
-#ifndef __NNFW_MISC_TENSOR_COMPARATOR_H__
-#define __NNFW_MISC_TENSOR_COMPARATOR_H__
-
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Reader.h"
-#include "misc/tensor/Diff.h"
-
-#include <functional>
-
-#include <vector>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to compare two tensors (expected and obtained to compare)
- */
-class Comparator
-{
-public:
- /**
- * @brief Construct a new @c Comparator object
- * @param[in] fn Function that compares two float values
- */
- Comparator(const std::function<bool(float lhs, float rhs)> &fn) : _compare_fn{fn}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Struct to observe comparison results
- */
- struct Observer
- {
- /**
- * @brief Get notification of comparison result at every index of two tensors
- * @param[in] index Index of tensors compared
- * @param[in] expected Expected value of element at @c index
- * @param[in] obtained Obtained value of element at @c index
- * @return N/A
- */
- virtual void notify(const Index &index, float expected, float obtained) = 0;
- };
-
-public:
- /**
- * @brief Compare two tensors
- * @param[in] shape Shape of two tensors
- * @param[in] expected @c Reader<float> object that accesses expected tensor
- * @param[in] obtained @c Reader<float> object that accesses obtained tensor
- * @param[in] observer @c Observer notified of expected value and obtained value at every index
- * @return @c std::vector<Diff<float>> containing information of failed comparison
- */
- // NOTE Observer should live longer than comparator
- std::vector<Diff<float>> compare(const Shape &shape, const Reader<float> &expected,
- const Reader<float> &obtained,
- Observer *observer = nullptr) const;
-
-private:
- std::function<bool(float lhs, float rhs)> _compare_fn;
-};
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_COMPARATOR_H__
diff --git a/libs/misc/include/misc/tensor/Diff.h b/libs/misc/include/misc/tensor/Diff.h
deleted file mode 100644
index c41a97987..000000000
--- a/libs/misc/include/misc/tensor/Diff.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Diff.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Diff struct
- */
-
-#ifndef __NNFW_MISC_TENSOR_DIFF_H__
-#define __NNFW_MISC_TENSOR_DIFF_H__
-
-#include "misc/tensor/Index.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Struct to have information after comparing two elements of two tensors
- */
-template <typename T> struct Diff
-{
- Index index; /**< Index of elements in two tensors, which turn out to be different */
-
- T expected; /**< Expected value of element of first tensor */
- T obtained; /**< Obtained value of element of second tensor */
-
- /**
- * @brief Construct a new @c Diff object
- * @param[in] i Initial value of index
- */
- Diff(const Index &i) : index(i)
- {
- // DO NOTHING
- }
-
- /**
- * @brief Construct a new @c Diff object
- * @param[in] i Index value
- * @param[in] e Expected value of element of first tensor
- * @param[in] o Obtained value of element of second tensor
- */
- Diff(const Index &i, const T &e, const T &o) : index(i), expected{e}, obtained{o}
- {
- // DO NOTHING
- }
-};
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_DIFF_H__
diff --git a/libs/misc/include/misc/tensor/Index.h b/libs/misc/include/misc/tensor/Index.h
deleted file mode 100644
index a08d7099e..000000000
--- a/libs/misc/include/misc/tensor/Index.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Index.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Index struct
- */
-#ifndef __NNFW_MISC_TENSOR_INDEX_H__
-#define __NNFW_MISC_TENSOR_INDEX_H__
-
-#include <cstdint>
-#include <cstddef>
-
-#include <vector>
-#include <initializer_list>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Struct to represent index of each dimension of a tensor
- */
-struct Index
-{
-public:
- /**
- * @brief Construct a new @c Index object
- * @param[in] rank Rank of a tensor
- */
- Index(size_t rank) { _offsets.resize(rank); }
-
-public:
- /**
- * @brief Construct a new @c Index object
- * @param[in] offsets Rank of a tensor of @c std::initializer_list<int32_t> type
- */
- Index(std::initializer_list<int32_t> offsets) : _offsets{offsets}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get the rank
- * @return Rank that this @c Index object can handle
- */
- size_t rank(void) const { return _offsets.size(); }
-
-public:
- /**
- * @brief Get the index n'th dimension
- * @param[in] n Dimension
- * @return index of n'th dimension
- */
- int32_t at(size_t n) const { return _offsets.at(n); }
-
- /**
- * @brief Get the reference of the index n'th dimension
- * @param[in] n Dimension
- * @return reference of index of n'th dimension
- */
- int32_t &at(size_t n) { return _offsets.at(n); }
-
-private:
- std::vector<int32_t> _offsets;
-};
-
-/**
- * @brief Copy an @c Index with reversed order
- * @param[in] origin @c Index object to copy
- * @return an @c Index object with reversed order
- * @note This is used to convert NNAPI tensor index to ARM tensor index or vice versa
- */
-inline static Index copy_reverse(const Index &origin)
-{
- size_t rank = origin.rank();
- Index target(rank);
- for (int i = 0; i < rank; i++)
- target.at(i) = origin.at(rank - 1 - i);
- return target;
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_INDEX_H__
diff --git a/libs/misc/include/misc/tensor/IndexEnumerator.h b/libs/misc/include/misc/tensor/IndexEnumerator.h
deleted file mode 100644
index 4912ea289..000000000
--- a/libs/misc/include/misc/tensor/IndexEnumerator.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IndexEnumerator.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::IndexEnumerator class
- */
-
-#ifndef __NNFW_MISC_TENSOR_INDEX_ENUMERATOR_H__
-#define __NNFW_MISC_TENSOR_INDEX_ENUMERATOR_H__
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-/**
- * @brief Class to enumerate index of a tensor
- *
- */
-class IndexEnumerator
-{
-public:
- /**
- * @brief Construct a new @c IndexEnumerator object
- * @param[in] shape Shape of tensor of which index will be enumerate
- */
- explicit IndexEnumerator(const Shape &shape) : _shape(shape), _index(shape.rank()), _cursor(0)
- {
- const size_t rank = _shape.rank();
-
- for (size_t axis = 0; axis < rank; ++axis)
- {
- _index.at(axis) = 0;
- }
-
- for (_cursor = 0; _cursor < rank; ++_cursor)
- {
- if (_index.at(_cursor) < _shape.dim(_cursor))
- {
- break;
- }
- }
- }
-
-public:
- /**
- * @brief Prevent constructing @c IndexEnumerator object by using R-value reference
- */
- IndexEnumerator(IndexEnumerator &&) = delete;
- /**
- * @brief Prevent copy constructor
- */
- IndexEnumerator(const IndexEnumerator &) = delete;
-
-public:
- /**
- * @brief Check if more enumeration is available
- * @return @c true if more @c advance() is available, otherwise @c false
- */
- bool valid(void) const { return _cursor < _shape.rank(); }
-
-public:
- /**
- * @brief Get the current index to enumerate
- * @return Current index
- */
- const Index &curr(void) const { return _index; }
-
-public:
- /**
- * @brief Advance index by +1
- */
- void advance(void)
- {
- const size_t rank = _shape.rank();
-
- // Find axis to be updated
- while ((_cursor < rank) && !(_index.at(_cursor) + 1 < _shape.dim(_cursor)))
- {
- ++_cursor;
- }
-
- if (_cursor == rank)
- {
- return;
- }
-
- // Update index
- _index.at(_cursor) += 1;
-
- for (size_t axis = 0; axis < _cursor; ++axis)
- {
- _index.at(axis) = 0;
- }
-
- // Update cursor
- _cursor = 0;
- }
-
-public:
- const Shape _shape; //!< Shape to enumerate
-
-private:
- size_t _cursor;
- Index _index;
-};
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_INDEX_ENUMERATOR_H__
diff --git a/libs/misc/include/misc/tensor/IndexFormatter.h b/libs/misc/include/misc/tensor/IndexFormatter.h
deleted file mode 100644
index 7ae34eec1..000000000
--- a/libs/misc/include/misc/tensor/IndexFormatter.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IndexFormatter.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::IndexFormatter class
- */
-
-#ifndef __NNFW_MISC_TENSOR_INDEX_FORMATTER_H__
-#define __NNFW_MISC_TENSOR_INDEX_FORMATTER_H__
-
-#include "misc/tensor/Index.h"
-
-#include <ostream>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to send @c Index object to output stream
- */
-class IndexFormatter
-{
-public:
- /**
- * @brief Construct a new @c IndexFormatter object
- * @param[in] index index to be sent to output stream
- */
- IndexFormatter(const nnfw::misc::tensor::Index &index) : _index(index)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get an @c Index object
- * @return @c Index object previously passed to the constructor
- */
- const nnfw::misc::tensor::Index &index(void) const { return _index; }
-
-private:
- const nnfw::misc::tensor::Index &_index;
-};
-
-/**
- * @brief Send @c IndexFormatter object to output stream
- * @param[in] os Output stream
- * @param[in] fmt @c IndexFormatter object that is sent to output stream
- * @return Output stream
- */
-std::ostream &operator<<(std::ostream &os, const IndexFormatter &fmt);
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_INDEX_FORMATTER_H__
diff --git a/libs/misc/include/misc/tensor/IndexIterator.h b/libs/misc/include/misc/tensor/IndexIterator.h
deleted file mode 100644
index f6428e19e..000000000
--- a/libs/misc/include/misc/tensor/IndexIterator.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file IndexIterator.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::IndexIterator class and
- * helper function and operator
- */
-#ifndef __NNFW_MISC_TENSOR_INDEX_ITERATOR_H__
-#define __NNFW_MISC_TENSOR_INDEX_ITERATOR_H__
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/IndexEnumerator.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to iterate indexes available for given shape
- */
-class IndexIterator
-{
-public:
- /**
- * @brief Construct a new @c IndexIterator object
- * @param[in] shape Shape of tensor of which index will be iterated
- */
- IndexIterator(const Shape &shape) : _shape(shape)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Construct a new IndexIterator object using reference
- * @param[in] IndexIterator @c IndexIterator object to move
- */
- IndexIterator(IndexIterator &&) = default;
-
- /**
- * @brief Prevent copy constructor
- */
- IndexIterator(const IndexIterator &) = delete;
-
-public:
- /**
- * @brief Iterate all available indexes and run a function for each index
- * @param[in] fn Function that requires an index as a parameter.
- * @return @c IndexIterator object
- */
- template <typename Callable> IndexIterator &iter(Callable fn)
- {
- for (IndexEnumerator e{_shape}; e.valid(); e.advance())
- {
- fn(e.curr());
- }
-
- return (*this);
- }
-
-private:
- const Shape &_shape;
-};
-
-/**
- * @brief Get an @c IndexItator object
- * @param[in] shape Shape of tensor of which index will be iterated
- * @return @c IndexIterator object
- */
-inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; }
-
-/**
- * @brief Iterate all indexes and apply a function
- * @param[in] it @c IndexIterator object that is constructed with a tensor shape
- * @param[in] cb A function that will receive a specific index.
- * Inside the function, the index is used to manipulate tensor element.
- * @return @c IndexIterator object
- */
-template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb)
-{
- return it.iter(cb);
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_INDEX_ITERATOR_H__
diff --git a/libs/misc/include/misc/tensor/NonIncreasingStride.h b/libs/misc/include/misc/tensor/NonIncreasingStride.h
deleted file mode 100644
index e7ad0857b..000000000
--- a/libs/misc/include/misc/tensor/NonIncreasingStride.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file NonIncreasingStride.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::NonIncreasingStride class
- */
-#ifndef __NNFW_MISC_TENSOR_NON_INCREASING_STRIDE_H__
-#define __NNFW_MISC_TENSOR_NON_INCREASING_STRIDE_H__
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to represent strides where stride[N-1] >= stride[N] holds for all N < rank
- */
-class NonIncreasingStride
-{
-public:
- /**
- * @brief Initialize the stride data using @c Shape
- * @param[in] shape to build stride info
- * @return N/A
- */
- void init(const Shape &shape)
- {
- _stride.resize(shape.rank());
- _stride.at(shape.rank() - 1) = 1;
-
- for (uint32_t axis = shape.rank() - 1; axis > 0; --axis)
- {
- _stride.at(axis - 1) = _stride.at(axis) * shape.dim(axis);
- }
- }
-
-public:
- /**
- * @brief Get an stride value for specific axis
- * @param[in] axis Axis of stride
- * @return The value of stride
- */
- uint32_t at(uint32_t axis) const { return _stride.at(axis); }
-
-public:
- /**
- * @brief Get the 1-D offset of specified index for n-D tensor
- * @param index @c Index object
- * @return 1-D offset of index
- */
- uint32_t offset(const Index &index) const;
-
-private:
- std::vector<uint32_t> _stride;
-};
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_NON_INCREASING_STRIDE_H__
diff --git a/libs/misc/include/misc/tensor/Object.h b/libs/misc/include/misc/tensor/Object.h
deleted file mode 100644
index 83fbc0bd1..000000000
--- a/libs/misc/include/misc/tensor/Object.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Object.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Object class
- */
-
-#ifndef __NNFW_MISC_TENSOR_OBJECT_H__
-#define __NNFW_MISC_TENSOR_OBJECT_H__
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/NonIncreasingStride.h"
-#include "misc/tensor/Reader.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to build a tensor using specific generator
- * @tparam T Type of tensor element
- */
-
-template <typename T> class Object final : public Reader<T>
-{
-public:
- /**
- * @brief Function to generate tensor element
- */
- using Generator = std::function<T(const Shape &shape, const Index &index)>;
-
-public:
- /**
- * @brief Construct a new @c Object object
- * @param[in] shape Tensor shape
- * @param[in] fn Function to generate tensor elements
- */
- Object(const Shape &shape, const Generator &fn) : _shape{shape}
- {
- // Set 'stride'
- _stride.init(shape);
-
- // Pre-allocate buffer
- _values.resize(_shape.dim(0) * _stride.at(0));
-
- // Set 'value'
- iterate(_shape) <<
- [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); };
- }
-
-public:
- /**
- * @brief Get reference of shape
- * @return Reference of shape
- */
- const Shape &shape(void) const { return _shape; }
-
-public:
- /**
- * @brief Get and element of tensor
- * @param[in] index Index of a tensor element
- * @return Value of tensor element
- */
- T at(const Index &index) const override { return _values.at(_stride.offset(index)); }
-
-private:
- Shape _shape;
- NonIncreasingStride _stride;
-
-private:
- std::vector<T> _values;
-};
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_FEATURE_OBJECT_H__
diff --git a/libs/misc/include/misc/tensor/Reader.h b/libs/misc/include/misc/tensor/Reader.h
deleted file mode 100644
index 9175a913e..000000000
--- a/libs/misc/include/misc/tensor/Reader.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Reader.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Reader struct
- */
-
-#ifndef __NNFW_MISC_TENSOR_READER_H__
-#define __NNFW_MISC_TENSOR_READER_H__
-
-#include "misc/tensor/Index.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Struct to read element of tensor
- * @tparam T Type of elements in tensor
- */
-template <typename T> struct Reader
-{
- /**
- * @brief Destroy the Reader object
- */
- virtual ~Reader() = default;
-
- /**
- * @brief Get an element of tensor
- * @param[in] index Index specifying indexes of tensor element
- * @return The value of specificed element
- */
- virtual T at(const Index &index) const = 0;
-};
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_READER_H__
diff --git a/libs/misc/include/misc/tensor/Shape.h b/libs/misc/include/misc/tensor/Shape.h
deleted file mode 100644
index 6e6c23502..000000000
--- a/libs/misc/include/misc/tensor/Shape.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Shape.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Shape class
- */
-
-#ifndef __NNFW_MISC_TENSOR_SHAPE_H__
-#define __NNFW_MISC_TENSOR_SHAPE_H__
-
-#include <cstdint>
-#include <cstddef>
-#include <deque>
-#include <initializer_list>
-#include <ostream>
-#include <string>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to represent shape of a tensor
- */
-class Shape
-{
-public:
- /**
- * @brief Construct a new Shape object
- * @param[in] rank Rank of a tensor
- */
- Shape(size_t rank) { _dimensions.resize(rank); }
-
-public:
- /**
- * @brief Construct a new Shape object
- * @param[in] dimensions @c initializer_list<int32_t> of dimensions of tensor
- */
- Shape(const std::initializer_list<int32_t> &dimensions) : _dimensions{dimensions}
- {
- // DO NOTHING
- }
-
- /**
- * @brief Construct a new Shape object
- * @param[in] origin @c Shape object to copy
- */
- Shape(const Shape &origin) = default;
-
-public:
- /**
- * @brief Add dimension to the beginning
- * @param[in] d dimension to add to the beginning
- * @return N/A
- */
- void prepend(int32_t d) { _dimensions.emplace_front(d); }
-
- /**
- * @brief Add dimension to the back
- * @param[in] d dimension to add to the back
- * @return N/A
- */
- void append(int32_t d) { _dimensions.emplace_back(d); }
-
-public:
- /**
- * @brief Get the rank of this shape
- * @return rank
- */
- size_t rank(void) const { return _dimensions.size(); }
-
-public:
- /**
- * @brief Get specific dimension
- * @param[in] n Index of dimension
- * @return n'th dimension
- */
- int32_t dim(size_t n) const { return _dimensions.at(n); }
-
- /**
- * @brief Get the reference of specific dimension
- * @param[in] n Index of dimension
- * @return Reference of n'th dimension
- */
- int32_t &dim(size_t n) { return _dimensions.at(n); }
-
-public:
- /**
- * @brief Get the number of elements specified by this shape
- * @return The number of elements
- */
- size_t element_nums() const
- {
- size_t nums = 1;
- for (auto d : _dimensions)
- {
- nums *= d;
- }
- return nums;
- }
-
-private:
- std::deque<int32_t> _dimensions;
-
-public:
- /**
- * @brief Get a @c Shape object after parsing string
- * @param[in] s String of dimension list. Accepted format is numbers separated by comma.
- * @return @c Shape object
- */
- static Shape from(const std::string &s);
-};
-
-/**
- * @brief Check equality of two @c Shape
- * @param[in] Shape First shape to compare
- * @param[in] Shape Second shape to compare
- * @return @c true if both shapes are equal, otherwise @c false
- */
-bool operator==(const Shape &, const Shape &);
-
-/**
- * @brief Send @c Shape to @c std::ostream
- * @param[in] os @c std::ostream to process this @c Shape
- * @param[in] shape @c Shape to send to @c ostream
- * @return Reference of @c std::ostream
- */
-std::ostream &operator<<(std::ostream &os, const Shape &shape);
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_SHAPE_H__
diff --git a/libs/misc/include/misc/tensor/Zipper.h b/libs/misc/include/misc/tensor/Zipper.h
deleted file mode 100644
index 8f0ec4ab6..000000000
--- a/libs/misc/include/misc/tensor/Zipper.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Zipper.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains nnfw::misc::tensor::Zipper class
- */
-
-#ifndef __NNFW_MISC_TENSOR_ZIPPER_H__
-#define __NNFW_MISC_TENSOR_ZIPPER_H__
-
-#include "misc/tensor/Index.h"
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/Reader.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-/**
- * @brief Class to apply a function with three params: @c Index, elements of a tensor
- * at passed index read by @c Reader objects
- */
-template <typename T> class Zipper
-{
-public:
- /**
- * @brief Construct a new @c Zipper object
- * @param[in] shape Shape of @c lhs and @c rhs
- * @param[in] lhs @c Reader object of a tensor
- * @param[in] rhs @c Reader object of a tensor
- */
- Zipper(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs)
- : _shape{shape}, _lhs{lhs}, _rhs{rhs}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Apply @c cb to all elements of tensors. Elements of two tensors
- * at passed @c index are read by @c lhs and @c rhs
- * @param[in] cb Function to apply
- * @return N/A
- */
- template <typename Callable> void zip(Callable cb) const
- {
- iterate(_shape) <<
- [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); };
- }
-
-private:
- const Shape &_shape;
- const Reader<T> &_lhs;
- const Reader<T> &_rhs;
-};
-
-/**
- * @brief Apply @c cb by using @c lhs and @c rhs passed to the constructor of @c zipper
- * @param[in] zipper @c Zipper object
- * @param[in] cb Function to zpply using @c zip function
- * @return @c zipper object after applying @c cb to @c zipper
- */
-template <typename T, typename Callable>
-const Zipper<T> &operator<<(const Zipper<T> &zipper, Callable cb)
-{
- zipper.zip(cb);
- return zipper;
-}
-
-/**
- * @brief Get @c Zipper object constructed using passed params
- * @param shape Shape of @c lhs and @c rhs
- * @param lhs @c Reader object of a tensor
- * @param rhs @c Reader object of a tensor
- * @return @c Zipper object
- */
-template <typename T> Zipper<T> zip(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs)
-{
- return Zipper<T>{shape, lhs, rhs};
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_TENSOR_ZIPPER_H__
diff --git a/libs/misc/include/misc/vector.h b/libs/misc/include/misc/vector.h
deleted file mode 100644
index 395b08912..000000000
--- a/libs/misc/include/misc/vector.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file vector.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains @c == operator to check equality of elements in two vectors
- */
-#ifndef __NNFW_MISC_VECTOR_H__
-#define __NNFW_MISC_VECTOR_H__
-
-#include <vector>
-
-/**
- * @brief Compare elements of two vectors
- * @tparam T Type of elements in vectors
- * @param[in] lhs First vector to compare
- * @param[in] rhs Second vector to compare
- * @return @c true if all elements are equal, otherwise @c false.
- */
-template <typename T> bool operator==(const std::vector<T> &lhs, const std::vector<T> &rhs)
-{
- if (lhs.size() != rhs.size())
- {
- return false;
- }
-
- for (size_t ind = 0; ind < lhs.size(); ++ind)
- {
- if (lhs.at(ind) != rhs.at(ind))
- {
- return false;
- }
- }
-
- return true;
-}
-
-#endif // __NNFW_MISC_VECTOR_H__
diff --git a/libs/misc/include/misc/vector/Object.h b/libs/misc/include/misc/vector/Object.h
deleted file mode 100644
index 65d4bc613..000000000
--- a/libs/misc/include/misc/vector/Object.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Object.h
- * @brief This file contains Object class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_VECTOR_OBJECT_H__
-#define __NNFW_MISC_VECTOR_OBJECT_H__
-
-#include "misc/vector/Reader.h"
-
-#include <vector>
-#include <functional>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace vector
-{
-
-/**
- * @brief Class to have information of the operand for vector
- */
-template <typename T> class Object final : public Reader<T>
-{
-public:
- using Generator = std::function<T(int32_t size, int32_t offset)>;
-
-public:
- /**
- * @brief Construct Object object with size of vector and set value used by Generator
- * @param[in] size The size of vector
- * @param[in] gen A function to set values of operand tensor
- */
- Object(int32_t size, const Generator &gen) : _size{size}
- {
- _value.resize(_size);
-
- for (int32_t offset = 0; offset < size; ++offset)
- {
- _value.at(offset) = gen(size, offset);
- }
- }
-
-public:
- /**
- * @brief Get size of vector
- * @return Size of vector
- */
- int32_t size(void) const { return _size; }
-
-public:
- /**
- * @brief Get the value used by index
- * @param[in] nth The vector index
- * @return The value at the offset
- */
- T at(uint32_t nth) const override { return _value.at(nth); }
-
-private:
- /**
- * @brief Size of vector
- */
- const int32_t _size;
- /**
- * @brief The tensor vector of operand
- */
- std::vector<T> _value;
-};
-
-} // namespace vector
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_VECTOR_OBJECT_H__
diff --git a/libs/misc/include/misc/vector/Reader.h b/libs/misc/include/misc/vector/Reader.h
deleted file mode 100644
index eab4c427b..000000000
--- a/libs/misc/include/misc/vector/Reader.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Reader.h
- * @brief This file contains Reader class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_MISC_VECTOR_READER_H__
-#define __NNFW_MISC_VECTOR_READER_H__
-
-#include <cstdint>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace vector
-{
-
-/**
- * @brief Class reads values of vector
- * The interface class
- */
-template <typename T> struct Reader
-{
- /**
- * @brief Destruct Reader object using default destructor
- */
- virtual ~Reader() = default;
-
- /**
- * @brief Get the value used by the index
- * @param[in] nth The vector index
- * @return The value at the offset
- */
- virtual T at(uint32_t nth) const = 0;
-};
-
-} // namespace vector
-} // namespace misc
-} // namespace nnfw
-
-#endif // __NNFW_MISC_VECTOR_READER_H__
diff --git a/libs/misc/src/environment.cpp b/libs/misc/src/environment.cpp
deleted file mode 100644
index e39f18d62..000000000
--- a/libs/misc/src/environment.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <string.h>
-#include <cstdlib>
-#include <string>
-
-#include "misc/environment.h"
-
-namespace nnfw
-{
-namespace misc
-{
-
-int get_env_int(const char *name, int defaultValue)
-{
- const char *value = std::getenv(name);
- if (value != nullptr)
- return std::stoi(value);
- return defaultValue;
-}
-
-bool get_env_bool(const char *name, bool defaultValue)
-{
- const char *value = std::getenv(name);
- if (value != nullptr)
- {
- return std::stoi(value) != 0;
- }
-
- return defaultValue;
-}
-
-} // namespace misc
-} // namespace nnfw
-
-namespace nnfw
-{
-namespace misc
-{
-namespace env
-{
-
-IntAccessor::IntAccessor(const std::string &tag) : _tag{tag}
-{
- // DO NOTHING
-}
-
-bool IntAccessor::access(int &out) const
-{
- auto value = std::getenv(_tag.c_str());
-
- if (value == nullptr)
- {
- return false;
- }
-
- out = std::stoi(value);
- return true;
-}
-
-FloatAccessor::FloatAccessor(const std::string &tag) : _tag{tag}
-{
- // DO NOTHING
-}
-
-bool FloatAccessor::access(float &out) const
-{
- auto value = std::getenv(_tag.c_str());
-
- if (value == nullptr)
- {
- return false;
- }
-
- out = std::stof(value);
- return true;
-}
-
-} // namespace env
-} // namespace misc
-} // namespace nnfw
diff --git a/libs/misc/src/tensor/Comparator.cpp b/libs/misc/src/tensor/Comparator.cpp
deleted file mode 100644
index 013c9eed2..000000000
--- a/libs/misc/src/tensor/Comparator.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "misc/tensor/Comparator.h"
-#include "misc/tensor/Zipper.h"
-
-#include "misc/fp32.h"
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-std::vector<Diff<float>> Comparator::compare(const Shape &shape, const Reader<float> &expected,
- const Reader<float> &obtained,
- Observer *observer) const
-{
- std::vector<Diff<float>> res;
-
- zip(shape, expected, obtained) <<
- [&](const Index &index, float expected_value, float obtained_value) {
- const auto relative_diff = nnfw::misc::fp32::relative_diff(expected_value, obtained_value);
-
- if (!_compare_fn(expected_value, obtained_value))
- {
- res.emplace_back(index, expected_value, obtained_value);
- }
-
- // Update max_diff_index, if necessary
- if (observer != nullptr)
- {
- observer->notify(index, expected_value, obtained_value);
- }
- };
-
- return res;
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
diff --git a/libs/misc/src/tensor/IndexFormatter.cpp b/libs/misc/src/tensor/IndexFormatter.cpp
deleted file mode 100644
index c949db7a8..000000000
--- a/libs/misc/src/tensor/IndexFormatter.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/IndexFormatter.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-std::ostream &operator<<(std::ostream &os, const IndexFormatter &fmt)
-{
- const auto rank = fmt.index().rank();
-
- assert(rank > 0);
-
- os << fmt.index().at(0);
-
- if (rank > 1)
- {
- for (uint32_t axis = 1; axis < rank; ++axis)
- {
- os << ", " << fmt.index().at(axis);
- }
- }
-
- return os;
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
diff --git a/libs/misc/src/tensor/NonIncreasingStride.cpp b/libs/misc/src/tensor/NonIncreasingStride.cpp
deleted file mode 100644
index c51ad0324..000000000
--- a/libs/misc/src/tensor/NonIncreasingStride.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/NonIncreasingStride.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-uint32_t NonIncreasingStride::offset(const Index &index) const
-{
- const size_t rank = _stride.size();
-
- assert(index.rank() == rank);
-
- uint32_t offset = 0;
-
- for (size_t axis = 0; axis < rank; ++axis)
- {
- offset += _stride.at(axis) * index.at(axis);
- }
-
- return offset;
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
diff --git a/libs/misc/src/tensor/Shape.cpp b/libs/misc/src/tensor/Shape.cpp
deleted file mode 100644
index 675695e8e..000000000
--- a/libs/misc/src/tensor/Shape.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/Shape.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace misc
-{
-namespace tensor
-{
-
-bool operator==(const Shape &lhs, const Shape &rhs)
-{
- if (lhs.rank() != rhs.rank())
- {
- return false;
- }
-
- for (size_t axis = 0; axis < lhs.rank(); ++axis)
- {
- if (lhs.dim(axis) != rhs.dim(axis))
- {
- return false;
- }
- }
-
- return true;
-}
-
-Shape Shape::from(const std::string &str)
-{
- Shape shape(0);
-
- bool pending = false;
- int value = 0;
-
- for (const char *cur = str.c_str(); true; ++cur)
- {
- if (*cur == ',' || *cur == '\0')
- {
- if (pending)
- {
- shape.append(value);
- }
-
- if (*cur == '\0')
- {
- break;
- }
-
- pending = false;
- value = 0;
- continue;
- }
-
- assert(*cur >= '0' && *cur <= '9');
-
- pending = true;
- value *= 10;
- value += *cur - '0';
- }
-
- return shape;
-}
-
-std::ostream &operator<<(std::ostream &os, const Shape &shape)
-{
- if (shape.rank() > 0)
- {
- os << shape.dim(0);
-
- for (uint32_t axis = 1; axis < shape.rank(); ++axis)
- {
- os << "," << shape.dim(axis);
- }
- }
-
- return os;
-}
-
-} // namespace tensor
-} // namespace misc
-} // namespace nnfw
diff --git a/libs/profiling/CMakeLists.txt b/libs/profiling/CMakeLists.txt
deleted file mode 100644
index 7169508a1..000000000
--- a/libs/profiling/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-
-add_library(nnfw_lib_profiling STATIC ${SOURCES})
-set_property(TARGET nnfw_lib_profiling PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_include_directories(nnfw_lib_profiling PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/libs/profiling/include/profiling/profile_buffer.h b/libs/profiling/include/profiling/profile_buffer.h
deleted file mode 100644
index 83cd3eb2b..000000000
--- a/libs/profiling/include/profiling/profile_buffer.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/profiling/profile_buffer.h
-#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_
-#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_
-
-#include <cstddef>
-#include <cstdint>
-
-#include "profiling/time.h"
-
-namespace tflite {
-namespace profiling {
-
-// A profiling event.
-struct ProfileEvent {
- // Describes the type of event.
- // The event_metadata field may contain additional data for interpreting
- // the event.
- enum class EventType {
- // Default event type, the metadata field has no special significance.
- DEFAULT = 0,
- // The event is an operator invocation and the event_metadata field is the
- // index of operator node.
- OPERATOR_INVOKE_EVENT = 1
- };
-
- // Label of the event. This usually describes the event.
- const char* tag;
- // Timestamp in microseconds when the event began.
- uint64_t begin_timestamp_us;
- // Timestamp in microseconds when the event ended.
- uint64_t end_timestamp_us;
- // The field containing the type of event. This must be one of the event types
- // in EventType.
- EventType event_type;
- // Extra data describing the details of the event.
- uint32_t event_metadata;
-};
-} // namespace profiling
-} // namespace tflite
-
-#ifdef TFLITE_PROFILING_ENABLED
-
-#include <sys/time.h>
-#include <vector>
-
-namespace tflite {
-namespace profiling {
-constexpr uint32_t kInvalidEventHandle = static_cast<uint32_t>(~0) - 1;
-
-// A ring buffer of profile events.
-// This class is not thread safe.
-class ProfileBuffer {
- public:
- ProfileBuffer(uint32_t max_num_entries, bool enabled)
- : enabled_(enabled), current_index_(0), event_buffer_(max_num_entries) {}
-
- // Adds an event to the buffer with begin timestamp set to the current
- // timestamp. Returns a handle to event that can be used to call EndEvent. If
- // buffer is disabled this has no affect.
- // The tag of the event should remain valid till the buffer is valid.
- uint32_t BeginEvent(const char* tag, ProfileEvent::EventType event_type,
- uint32_t event_metadata) {
- if (!enabled_) {
- return kInvalidEventHandle;
- }
- uint64_t timestamp = time::NowMicros();
- int index = current_index_ % event_buffer_.size();
- event_buffer_[index].tag = tag;
- event_buffer_[index].event_type = event_type;
- event_buffer_[index].event_metadata = event_metadata;
- event_buffer_[index].begin_timestamp_us = timestamp;
- event_buffer_[index].end_timestamp_us = 0;
- current_index_++;
- return index;
- }
-
- // Sets the enabled state of buffer to |enabled|
- void SetEnabled(bool enabled) { enabled_ = enabled; }
-
- // Sets the end timestamp for event for the handle to current time.
- // If the buffer is disabled or previous event has been overwritten this
- // operation has not effect.
- void EndEvent(uint32_t event_handle) {
- if (!enabled_ || event_handle == kInvalidEventHandle ||
- event_handle > current_index_) {
- return;
- }
- const uint32_t max_size = event_buffer_.size();
- if (current_index_ > (max_size + event_handle)) {
- // Ignore, buffer has already overflowed.
- return;
- }
-
- int event_index = event_handle % max_size;
- event_buffer_[event_index].end_timestamp_us = time::NowMicros();
- }
-
- // Returns the size of the buffer.
- size_t Size() const {
- return (current_index_ >= event_buffer_.size()) ? event_buffer_.size()
- : current_index_;
- }
-
- // Resets the buffer.
- void Reset() {
- enabled_ = false;
- current_index_ = 0;
- }
-
- // Returns the profile event at the given index. If the index is invalid a
- // nullptr is returned. The return event may get overwritten if more events
- // are added to buffer.
- const struct ProfileEvent* const At(int index) const {
- size_t size = Size();
- if (index >= size) {
- return nullptr;
- }
- const uint32_t max_size = event_buffer_.size();
- uint32_t start =
- (current_index_ > max_size) ? current_index_ % max_size : max_size;
- index = (index + start) % max_size;
- return &event_buffer_[index];
- }
-
- private:
- bool enabled_;
- uint32_t current_index_;
- std::vector<ProfileEvent> event_buffer_;
-};
-} // namespace profiling
-} // namespace tflite
-#endif // TFLITE_PROFILING_ENABLED
-#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_
-
-// clang-format on
diff --git a/libs/profiling/include/profiling/profiler.h b/libs/profiling/include/profiling/profiler.h
deleted file mode 100644
index 953042da3..000000000
--- a/libs/profiling/include/profiling/profiler.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/profiling/profiler.h
-#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_
-#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_
-
-#include <vector>
-
-#include "profiling/profile_buffer.h"
-
-#ifdef TFLITE_PROFILING_ENABLED
-
-namespace tflite {
-namespace profiling {
-class ScopedProfile;
-class ScopedOperatorProfile;
-
-// Controls whether profiling is enabled or disabled and collects profiles.
-// TFLite is used on platforms that don't have posix threads, so the profiler is
-// kept as simple as possible. It is designed to be used only on a single
-// thread.
-//
-// Profiles are collected using Scoped*Profile objects that begin and end a
-// profile event.
-// An example usage is shown in the example below:
-//
-// Say Worker class has a DoWork method and we are interested in profiling
-// the overall execution time for DoWork and time spent in Task1 and Task2
-// functions.
-//
-// class Worker {
-// public:
-// void DoWork() {
-// ScopedProfile(&controller, "DoWork");
-// Task1();
-// Task2();
-// .....
-// }
-//
-// void Task1() {
-// ScopedProfile(&controller, "Task1");
-// ....
-// }
-//
-// void Task2() {
-// ScopedProfile(&controller, "Task2");
-// }
-//
-// Profiler profiler;
-// }
-//
-// We instrument the functions that need to be profiled.
-//
-// Profile can be collected by enable profiling and then getting profile
-// events.
-//
-// void ProfileWorker() {
-// Worker worker;
-// worker.profiler.EnableProfiling();
-// worker.DoWork();
-// worker.profiler.DisableProfiling();
-// // Profiling is complete, extract profiles.
-// auto profile_events = worker.profiler.GetProfiles();
-// }
-//
-//
-class Profiler {
- public:
- Profiler() : buffer_(1024, false) {}
-
- void StartProfiling() { buffer_.SetEnabled(true); }
- void StopProfiling() { buffer_.SetEnabled(false); }
- void Reset() { buffer_.Reset(); }
- std::vector<const ProfileEvent*> GetProfileEvents() {
- std::vector<const ProfileEvent*> profile_events;
- profile_events.reserve(buffer_.Size());
- for (size_t i = 0; i < buffer_.Size(); i++) {
- profile_events.push_back(buffer_.At(i));
- }
- return profile_events;
- }
-
- private:
- friend class ScopedProfile;
- friend class ScopedOperatorProfile;
- ProfileBuffer* GetProfileBuffer() { return &buffer_; }
- ProfileBuffer buffer_;
-};
-
-class ScopedProfile {
- public:
- // Adds a profile event to profile that begins with the construction
- // of object and ends when the object goes out of scope.
- // The lifetime of tag should be at least the lifetime of profiler.
-
- ScopedProfile(Profiler* profiler, const char* tag)
- : buffer_(nullptr), event_handle_(0) {
- if (profiler) {
- buffer_ = profiler->GetProfileBuffer();
- event_handle_ =
- buffer_->BeginEvent(tag, ProfileEvent::EventType::DEFAULT, 0);
- }
- }
- ~ScopedProfile() {
- if (buffer_) {
- buffer_->EndEvent(event_handle_);
- }
- }
-
- private:
- ProfileBuffer* buffer_;
- int32_t event_handle_;
-};
-
-class ScopedOperatorProfile {
- public:
- // Adds a profile event to profile that begins with the construction
- // of object and ends when the object goes out of scope.
- // The lifetime of tag should be at least the lifetime of profiler.
- ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index)
- : buffer_(nullptr), event_handle_(0) {
- if (profiler) {
- buffer_ = profiler->GetProfileBuffer();
- event_handle_ = buffer_->BeginEvent(
- tag, ProfileEvent::EventType::OPERATOR_INVOKE_EVENT, node_index);
- }
- }
-
- ~ScopedOperatorProfile() {
- if (buffer_) {
- buffer_->EndEvent(event_handle_);
- }
- }
-
- private:
- ProfileBuffer* buffer_;
- int32_t event_handle_;
-};
-
-} // namespace profiling
-} // namespace tflite
-
-#define VARNAME_UNIQ(name, ctr) name##ctr
-
-#define SCOPED_OPERATOR_PROFILE(profiler, node_index) \
- tflite::profiling::ScopedOperatorProfile VARNAME_UNIQ( \
- _profile_, __COUNTER__)((profiler), "OpInvoke", (node_index))
-#else
-
-namespace tflite {
-namespace profiling {
-// A noop version of profiler when profiling is disabled.
-class Profiler {
- public:
- Profiler() {}
- void StartProfiling() {}
- void StopProfiling() {}
- void Reset() {}
- std::vector<const ProfileEvent*> GetProfileEvents() { return {}; }
-};
-} // namespace profiling
-} // namespace tflite
-
-#define SCOPED_OPERATOR_PROFILE(profiler, node_index)
-
-#endif // TFLITE_PROFILING_ENABLED
-
-#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_
-
-// clang-format on
diff --git a/libs/profiling/include/profiling/profiling.h b/libs/profiling/include/profiling/profiling.h
deleted file mode 100644
index ee0df1338..000000000
--- a/libs/profiling/include/profiling/profiling.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_MISC_PROFILING_H__
-#define __NNFW_MISC_PROFILING_H__
-
-#include <iostream>
-
-namespace tflite
-{
-namespace profiling
-{
-class Profiler; // forward declaration
-}
-}
-
-namespace profiling
-{
-
-class Context
-{
-public:
- Context() : _sync(false), _profiler(nullptr) {}
-
-public:
- const bool &sync(void) const { return _sync; }
- tflite::profiling::Profiler *getProfiler() { return _profiler; }
- void setProfiler(tflite::profiling::Profiler *p) { _profiler = p; }
- void setSync(void) { _sync = true; }
-
-private:
- bool _sync;
- tflite::profiling::Profiler *_profiler;
-
-public:
- static Context &get(void)
- {
- static Context ctx{};
- return ctx;
- }
-};
-
-} // namespace profiling
-#endif // __NNFW_MISC_PROFILING_H__
diff --git a/libs/profiling/include/profiling/time.h b/libs/profiling/include/profiling/time.h
deleted file mode 100644
index 4b194944d..000000000
--- a/libs/profiling/include/profiling/time.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/profiling/time.h
-#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_
-#define TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_
-
-#include <cstdint>
-
-namespace tflite {
-namespace profiling {
-namespace time {
-uint64_t NowMicros();
-} // namespace time
-} // namespace profiling
-} // namespace tflite
-#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_
-
-// clang-format on
diff --git a/libs/profiling/src/profiling/time.cpp b/libs/profiling/src/profiling/time.cpp
deleted file mode 100644
index 92d8595f8..000000000
--- a/libs/profiling/src/profiling/time.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/profiling/time.cpp
-#include "profiling/time.h"
-
-#if defined(_MSC_VER)
-#include <chrono> // NOLINT(build/c++11)
-#else
-#include <sys/time.h>
-#endif
-
-namespace tflite {
-namespace profiling {
-namespace time {
-
-#if defined(_MSC_VER)
-
-uint64_t NowMicros() {
- return std::chrono::duration_cast<std::chrono::microseconds>(
- std::chrono::system_clock::now().time_since_epoch())
- .count();
-}
-
-#else
-
-uint64_t NowMicros() {
- struct timeval tv;
- gettimeofday(&tv, nullptr);
- return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
-}
-
-#endif // defined(_MSC_VER)
-
-} // namespace time
-} // namespace profiling
-} // namespace tflite
-
-// clang-format on
diff --git a/libs/tflite/CMakeLists.txt b/libs/tflite/CMakeLists.txt
deleted file mode 100644
index e844d1c68..000000000
--- a/libs/tflite/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
-file(GLOB_RECURSE TESTS "src/*.test.cpp")
-list(REMOVE_ITEM SOURCES ${TESTS})
-
-add_library(nnfw_lib_tflite STATIC ${SOURCES})
-set_target_properties(nnfw_lib_tflite PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(nnfw_lib_tflite PUBLIC ${NNFW_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_link_libraries(nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl)
-target_link_libraries(nnfw_lib_tflite nnfw_lib_misc)
-
-add_executable(nnfw_lib_tflite_test_TensorView src/TensorView.test.cpp)
-target_link_libraries(nnfw_lib_tflite_test_TensorView nnfw_lib_tflite)
diff --git a/libs/tflite/include/tflite/Assert.h b/libs/tflite/include/tflite/Assert.h
deleted file mode 100644
index 6d12d37f6..000000000
--- a/libs/tflite/include/tflite/Assert.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Assert.h
- * @brief This file contains helper function of assertion
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_ASSERT_H__
-#define __NNFW_TFLITE_ASSERT_H__
-
-#include "tensorflow/contrib/lite/context.h"
-
-#include <sstream>
-
-#define STR_DETAIL(value) #value
-#define STR(value) STR_DETAIL(value)
-
-#define TFLITE_ENSURE(exp) \
- { \
- const TfLiteStatus status = (exp); \
- \
- if (status != kTfLiteOk) \
- { \
- std::ostringstream ss; \
- ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \
- throw std::runtime_error{ss.str()}; \
- } \
- }
-
-#endif // __NNFW_TFLITE_ASSERT_H__
diff --git a/libs/tflite/include/tflite/Diff.h b/libs/tflite/include/tflite/Diff.h
deleted file mode 100644
index 15c672831..000000000
--- a/libs/tflite/include/tflite/Diff.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Diff.h
- * @brief This file contains classes for testing correctess of implementation
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_DIFF_H__
-#define __NNFW_TFLITE_DIFF_H__
-
-#include "tensorflow/contrib/lite/interpreter.h"
-
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Diff.h"
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Comparator.h"
-
-#include "tflite/TensorView.h"
-
-#include <functional>
-#include <vector>
-
-/**
- * @brief Class to define TfLite interpreter match application
- */
-class TfLiteInterpMatchApp
-{
-public:
- /**
- * @brief Construct a new TfLiteInterpMatchApp object with Comparator
- * @param[in] comparator Comparator object for tensor comparation
- */
- TfLiteInterpMatchApp(const nnfw::misc::tensor::Comparator &comparator)
- : _verbose{false}, _comparator(comparator)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get reference verbose for debugging information
- * @return Reference of verbose value
- */
- int &verbose(void) { return _verbose; }
-
-private:
- int _verbose;
-
-public:
- /**
- * @brief Run two interpreter and return the output matching
- * @param[in] pure Interpreter object of expected(with TfLite)
- * @param[in] nnapi Interpreter object of obtained(through NNAPI)
- * @return @c true if two Interpreter results are same, otherwise @c false
- */
- bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const;
- /**
- * @brief Compare two TensorView values and return the match result
- * @param[in] expected TensorView object to read expected values
- * @param[in] obtained TensorView object to read obtained values
- * @param[in] id Tensor ID value used for debug message
- * @return @c true if two TensorView values are same, otherwise @c false
- */
- template <typename T>
- bool compareSingleTensorView(const nnfw::tflite::TensorView<T> &expected,
- const nnfw::tflite::TensorView<T> &obtained, int id) const;
-
-private:
- const nnfw::misc::tensor::Comparator &_comparator;
-};
-
-#include "tflite/interp/Builder.h"
-#include "tflite/Quantization.h"
-
-#include <random>
-
-/**
- * @brief Class to generate random values
- */
-class RandomGenerator
-{
-public:
- /**
- * @brief Construct a new RandomGenerator object
- * @param[in] seed Random seed value
- * @param[in] mean Mean value of normal random number generation
- * @param[in] stddev Standard deviation of random number generation
- * @param[in] quantization TfLiteQuantizationParams type to represent quantization value
- * (not used yet)
- */
- RandomGenerator(int seed, float mean, float stddev,
- const TfLiteQuantizationParams quantization = make_default_quantization())
- : _rand{seed}, _dist{mean, stddev}, _quantization{quantization}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Generate random numbers for type T
- * @param[in] s Shape value
- * @param[in] i Index value
- * @return Random generated value
- * @note This is same as T generate(void) as two input parameters are not used
- */
- template <typename T>
- T generate(const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)
- {
- return generate<T>();
- }
-
- /**
- * @brief Generate random numbers for type T
- * @return Random generated value
- */
- template <typename T> T generate(void) { return _dist(_rand); }
-
-private:
- std::minstd_rand _rand;
- std::normal_distribution<float> _dist;
- const TfLiteQuantizationParams _quantization;
-};
-
-template <> uint8_t RandomGenerator::generate<uint8_t>(void);
-
-/**
- * @brief Structure for NNAPI correctness test
- */
-struct RandomTestParam
-{
- int verbose; //!< Verbosity of debug information
- int tolerance; //!< Torlerance of value difference
- int tensor_logging = 0; //!< Save logging to a file if not 0
- std::string log_path = ""; //!< Path of log file, meaningful only when tensor_logging is 1
-};
-
-/**
- * @brief Class to define Random test runner
- */
-class RandomTestRunner
-{
-public:
- /**
- * @brief Construct a new RandomTestRunner object
- * @param[in] seed Random seed value
- * @param[in] param RandomTestParam object for test runner
- * @param[in] quantization TfLiteQuantizationParams type to represent quantization value
- */
- RandomTestRunner(int seed, const RandomTestParam &param,
- const TfLiteQuantizationParams quantization = make_default_quantization())
- : _randgen{seed, 0.0f, 2.0f, quantization}, _param{param}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Run the random test runner
- * @param[in] builder Interpreter Builder used to run
- * @return 0 if test succeeds, otherwise failure
- */
- int run(const nnfw::tflite::Builder &builder);
-
-public:
- /**
- * @brief Get RandomGenerator reference
- * @return RandomGenerator reference
- */
- RandomGenerator &generator() { return _randgen; };
-
-private:
- RandomGenerator _randgen;
- const RandomTestParam _param;
-
-public:
- /**
- * @brief Create a RandomTestRunner object
- * @param[in] seed Random seed value
- * @return RandomGenerator object
- */
- static RandomTestRunner make(int seed);
-};
-
-#endif // __NNFW_TFLITE_DIFF_H__
diff --git a/libs/tflite/include/tflite/FeatureView.h b/libs/tflite/include/tflite/FeatureView.h
deleted file mode 100644
index 06cbf4b14..000000000
--- a/libs/tflite/include/tflite/FeatureView.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file FeatureView.h
- * @brief This file contains FeatureView class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_FEATURE_VIEW_H__
-#define __NNFW_TFLITE_FEATURE_VIEW_H__
-
-#include "tensorflow/contrib/lite/interpreter.h"
-
-#include "tflite/InputIndex.h"
-#include "tflite/OutputIndex.h"
-
-#include "misc/feature/Shape.h"
-#include "misc/feature/Reader.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-template <typename T> class FeatureView;
-
-/**
- * @brief Class to support reading element of float type feature
- */
-template <> class FeatureView<float> : public nnfw::misc::feature::Reader<float>
-{
-public:
- /**
- * @brief Construct a new FeatureView object
- * @param[in] interp Interpreter to read from
- * @param[in] index InputIndex index of input
- */
- FeatureView(::tflite::Interpreter &interp, const InputIndex &index);
- /**
- * @brief Construct a new FeatureView object
- * @param[in] interp Interpreter to read from
- * @param[in] index OutputIndex index of output
- */
- FeatureView(::tflite::Interpreter &interp, const OutputIndex &index);
-
-public:
- /**
- * @brief Get value of element using channel, row and column index
- * @param[in] ch Channel index
- * @param[in] row Row index
- * @param[in] col Column index
- * @return Value of element
- */
- float at(uint32_t ch, uint32_t row, uint32_t col) const;
- /**
- * @brief Get reference of element using channel, row and column index
- * @param[in] ch Channel index
- * @param[in] row Row index
- * @param[in] col Column index
- * @return Reference of element
- */
- float &at(uint32_t ch, uint32_t row, uint32_t col);
-
-private:
- /**
- * @brief Get offset of element from channel, row and column index
- * @param[in] ch Channel index
- * @param[in] row Row index
- * @param[in] col Column index
- * @return Offset of element
- */
- uint32_t getElementOffset(uint32_t ch, uint32_t row, uint32_t col) const
- {
- uint32_t res = 0;
-
- // TensorFlow Lite assumes that NHWC ordering for tessor
- res += row * _shape.W * _shape.C;
- res += col * _shape.C;
- res += ch;
-
- return res;
- }
-
-private:
- nnfw::misc::feature::Shape _shape;
- float *_base;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_FEATURE_VIEW_H__
diff --git a/libs/tflite/include/tflite/InputIndex.h b/libs/tflite/include/tflite/InputIndex.h
deleted file mode 100644
index f535b2626..000000000
--- a/libs/tflite/include/tflite/InputIndex.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file InputIndex.h
- * @brief This file contains InputIndex class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INPUT_INDEX_H__
-#define __NNFW_TFLITE_INPUT_INDEX_H__
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to express index of input
- */
-class InputIndex
-{
-public:
- /**
- * @brief Construct a new InputIndex object with index value
- * @param [in] index The value of index
- */
- InputIndex(int index) : _index(index)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get index value as int
- * @return Index value as int
- */
- int asInt(void) const { return _index; }
-
-private:
- int _index;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INPUT_INDEX_H__
diff --git a/libs/tflite/include/tflite/InterpreterSession.h b/libs/tflite/include/tflite/InterpreterSession.h
deleted file mode 100644
index deaf05a7f..000000000
--- a/libs/tflite/include/tflite/InterpreterSession.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file InterpreterSession.h
- * @brief This file contains InterpreterSession class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERPRETER_SESSION_H__
-#define __NNFW_TFLITE_INTERPRETER_SESSION_H__
-
-#include "Session.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define TfLite interpreter session which is inherited from Session class
- */
-class InterpreterSession final : public Session
-{
-public:
- /**
- * @brief Construct a InterpreterSession object with interpreter of TfLite
- * @param[in] interp The TfLite interpreter pointer
- */
- InterpreterSession(::tflite::Interpreter *interp) : _interp{interp}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get TfLite interpreter pointer
- * @return The TfLite interpreter
- */
- ::tflite::Interpreter *interp(void) override { return _interp; }
-
-public:
- /**
- * @brief Prepare the TfLite interpreter session
- * @return @c true if tensor preparation is successful, otherwise @c false
- */
- bool prepare(void) override
- {
- _interp->UseNNAPI(false);
-
- if (kTfLiteOk != _interp->AllocateTensors())
- {
- return false;
- }
-
- return true;
- }
-
- /**
- * @brief Run the Invoke function of TfLite interpreter
- * @return @c true if Invoke() is successful, otherwise @c false
- */
- bool run(void) override
- {
- // Return true if Invoke returns kTfLiteOk
- return kTfLiteOk == _interp->Invoke();
- }
-
- /**
- * @brief Tear down TfLite interpreter session
- * @return @c true always
- */
- bool teardown(void) override
- {
- // Do NOTHING currently
- return true;
- }
-
-private:
- ::tflite::Interpreter *const _interp;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERPRETER_SESSION_H__
diff --git a/libs/tflite/include/tflite/NNAPISession.h b/libs/tflite/include/tflite/NNAPISession.h
deleted file mode 100644
index b2a999d10..000000000
--- a/libs/tflite/include/tflite/NNAPISession.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file NNAPISession.h
- * @brief This file contains NNAPISession class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_NNAPI_SESSION_H__
-#define __NNFW_TFLITE_NNAPI_SESSION_H__
-
-#include "Session.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define NNAPI interpreter session which is inherited from Session class
- */
-class NNAPISession final : public Session
-{
-public:
- /**
- * @brief Construct a NNAPISession object with interpreter of TfLite
- * @param[in] interp The TfLite interpreter pointer
- * @note Invoke BuildGraph() of NNAPI delegate from Interpreter
- */
- NNAPISession(::tflite::Interpreter *interp) : _interp{interp}
- {
- // Construct Graph from Interpreter
- _delegate.BuildGraph(_interp);
- }
-
-public:
- /**
- * @brief Get TfLite interpreter pointer
- * @return The TfLite interpreter
- */
- ::tflite::Interpreter *interp(void) override { return _interp; }
-
-public:
- /**
- * @brief Prepare the TfLite interpreter session
- * @return @c true if tensor preparation is successful, otherwise @c false
- */
- bool prepare(void) override
- {
- // Explicitly turn off T/F lite internal NNAPI delegation in order to use locally defined
- // NNAPI delegation.
- _interp->UseNNAPI(false);
-
- if (kTfLiteOk != _interp->AllocateTensors())
- {
- return false;
- }
-
- return true;
- }
-
- /**
- * @brief Run the Invoke function of NNAPI delegate
- * @return @c true if Invoke() is successful, otherwise @c false
- */
- bool run(void) override { return kTfLiteOk == _delegate.Invoke(_interp); }
-
- /**
- * @brief Tear down TfLite interpreter session
- * @return @c true always
- */
- bool teardown(void) override
- {
- // DO NOTHING
- return true;
- }
-
-private:
- ::tflite::Interpreter *const _interp;
- nnfw::tflite::NNAPIDelegate _delegate;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_NNAPI_SESSION_H__
diff --git a/libs/tflite/include/tflite/OutputIndex.h b/libs/tflite/include/tflite/OutputIndex.h
deleted file mode 100644
index dd1ca8d44..000000000
--- a/libs/tflite/include/tflite/OutputIndex.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file OutputIndex.h
- * @brief This file contains OutputIndex class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_OUTPUT_INDEX_H__
-#define __NNFW_TFLITE_OUTPUT_INDEX_H__
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define OutputIndex
- */
-class OutputIndex
-{
-public:
- /**
- * @brief Construct a OutputIndex object with index value
- * @param[in] index The value of index
- */
- OutputIndex(int index) : _index(index)
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Get index value as int
- * @return Index valuel as int
- */
- int asInt(void) const { return _index; }
-
-private:
- int _index;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_OUTPUT_INDEX_H__
diff --git a/libs/tflite/include/tflite/Quantization.h b/libs/tflite/include/tflite/Quantization.h
deleted file mode 100644
index 4a8a0f1ac..000000000
--- a/libs/tflite/include/tflite/Quantization.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Quantization.h
- * @brief This file contains BitwiseIntToFloat union and quantization related
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_QUANTIZATION_H__
-#define __NNFW_TFLITE_QUANTIZATION_H__
-
-/**
- * @brief Union to provide bitwise conversion of integer and float
- */
-union BitwiseIntToFloat {
- int i;
- float f;
-};
-
-static const float FLOAT_NEAREST_TO_1 = BitwiseIntToFloat{0x3f7fffff}.f;
-
-#include "tensorflow/contrib/lite/context.h"
-
-/**
- * @brief Get TfLiteQuantizationParams object with default values
- * @return TfLiteQuantizationParams object
- */
-TfLiteQuantizationParams make_default_quantization(void);
-
-#endif // __NNFW_TFLITE_QUANTIZATION_H__
diff --git a/libs/tflite/include/tflite/Session.h b/libs/tflite/include/tflite/Session.h
deleted file mode 100644
index 4f2e5c54d..000000000
--- a/libs/tflite/include/tflite/Session.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Session.h
- * @brief This file contains Session class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_SESSION_H__
-#define __NNFW_TFLITE_SESSION_H__
-
-#include <tensorflow/contrib/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure to provide interface methods of interpreter session
- */
-struct Session
-{
- /**
- * @brief Destruct Session object using default destructor
- */
- virtual ~Session() = default;
-
- /**
- * @brief Get the Interpreter object pointer
- * @return The Interpreter object pointer
- */
- virtual ::tflite::Interpreter *interp(void) = 0;
-
- /**
- * @brief Prepare the session
- * @return @c true if prepare method succeeded, otherwise @c false
- */
- virtual bool prepare(void) = 0;
- /**
- * @brief Run the session
- * @return @c true if run method succeeded, otherwise @c false
- */
- virtual bool run(void) = 0;
- /**
- * @brief Teardown(release) the session
- * @return @c true if teardown method succeeded, otherwise @c false
- */
- virtual bool teardown(void) = 0;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_SESSION_H__
diff --git a/libs/tflite/include/tflite/TensorLogger.h b/libs/tflite/include/tflite/TensorLogger.h
deleted file mode 100644
index e56a76b58..000000000
--- a/libs/tflite/include/tflite/TensorLogger.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorLogger.h
- * @brief This file contains TensorLogger class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_LOGGER_H__
-#define __NNFW_TFLITE_TENSOR_LOGGER_H__
-
-#include "misc/tensor/IndexIterator.h"
-#include "tflite/TensorView.h"
-
-#include <tensorflow/contrib/lite/interpreter.h>
-#include <tensorflow/contrib/lite/context.h>
-#include <fstream>
-#include <iomanip>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to write input and output value / shape into a file in python form
- * @note This is a utility to write input and output value / shape into a file in python form.\n
- * any python app can load this value by running the python code below:\n
- * exec(open(filename).read())\n
- * generated python code looks like the following: \n
- * tensor_shape_gen = []\n
- * tensor_value_gen = []\n\n
- * tensor_shape_gen.append("{2, 1, 2}")\n
- * tensor_value_gen.append([1, 2, 3, 4])\n\n
- * tensor_shape_gen.append("{2}")\n
- * tensor_value_gen.append([1, 2])\n\n
- * tensor_shape_gen.append("{2, 1, 2}")\n
- * tensor_value_gen.append([1, 4, 3, 8])\n
- */
-class TensorLogger
-{
-private:
- std::ofstream _outfile;
-
-public:
- /**
- * @brief Get TensorLogger instance
- * @return The TensorLogger instance
- */
- static TensorLogger &instance()
- {
- static TensorLogger instance;
- return instance;
- }
-
- /**
- * @brief Save the tensor details to file from interpreter
- * @param[in] path The file path to save
- * @param[in] interp The TfLite interpreter
- */
- void save(const std::string &path, ::tflite::Interpreter &interp)
- {
- open(path);
-
- int log_index = 0;
- for (const auto id : interp.inputs())
- {
- _outfile << "# input tensors" << std::endl;
- printTensor(interp, id, log_index++);
- }
- for (const auto id : interp.outputs())
- {
- _outfile << "# output tensors" << std::endl;
- printTensor(interp, id, log_index++);
- }
- close();
- }
-
-private:
- void open(const std::string &path)
- {
- if (!_outfile.is_open())
- _outfile.open(path, std::ios_base::out);
-
- _outfile << "# ------ file: " << path << " ------" << std::endl
- << "tensor_shape_gen = []" << std::endl
- << "tensor_value_gen = []" << std::endl
- << std::endl;
- }
-
- void printTensor(::tflite::Interpreter &interp, const int id, const int log_index)
- {
- const TfLiteTensor *tensor = interp.tensor(id);
-
- _outfile << "# tensor name: " << tensor->name << std::endl;
- _outfile << "# tflite::interpreter.tensor(" << id << ") -> "
- "tensor_value_gen["
- << log_index << "]" << std::endl;
-
- if (tensor->type == kTfLiteInt32)
- {
- printTensorShape(tensor);
- printTensorValue<int32_t>(tensor, tensor->data.i32);
- }
- else if (interp.tensor(id)->type == kTfLiteUInt8)
- {
- printTensorShape(tensor);
- printTensorValue<uint8_t>(tensor, tensor->data.uint8);
- }
- else if (tensor->type == kTfLiteFloat32)
- {
- printTensorShape(tensor);
- printTensorValue<float>(tensor, tensor->data.f);
- }
- }
-
- void printTensorShape(const TfLiteTensor *tensor)
- {
- _outfile << "tensor_shape_gen.append('{";
-
- size_t r = 0;
- for (; r < tensor->dims->size - 1; r++)
- {
- _outfile << tensor->dims->data[r] << ", ";
- }
- _outfile << tensor->dims->data[r];
-
- _outfile << "}')" << std::endl;
- }
-
- template <typename T> void printTensorValue(const TfLiteTensor *tensor, T *tensor_data_ptr)
- {
- _outfile << "tensor_value_gen.append([";
-
- _outfile << std::fixed << std::setprecision(10);
-
- const T *end = reinterpret_cast<const T *>(tensor->data.raw_const + tensor->bytes);
- for (T *ptr = tensor_data_ptr; ptr < end; ptr++)
- _outfile << *ptr << ", ";
-
- _outfile << "])" << std::endl << std::endl;
- }
-
- void close()
- {
- _outfile << "# --------- tensor shape and value defined above ---------" << std::endl;
- _outfile.close();
- }
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_LOGGER_H__
diff --git a/libs/tflite/include/tflite/TensorShapeUtils.h b/libs/tflite/include/tflite/TensorShapeUtils.h
deleted file mode 100644
index ba8687413..000000000
--- a/libs/tflite/include/tflite/TensorShapeUtils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorShapeUtils.h
- * @brief This file contains utilities function of tensor shape
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
-
-#include "misc/tensor/Shape.h"
-
-#include <vector>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Converts tensor::Shape into a vector
- * @param[in] shape The tensor shape to be converted
- * @return vector value of given shape object
- */
-static inline std::vector<int32_t> as_dims(const nnfw::misc::tensor::Shape &shape)
-{
- std::vector<int32_t> dims;
-
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- dims.emplace_back(shape.dim(axis));
- }
-
- return dims;
-}
-
-/**
- * @brief Broadcasts between two given shapes
- * @param[in] lhs_shape The left hand side shape
- * @param[in] rhs_shape The right hand side shape
- * @return The broadcasted shape
- */
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
- const nnfw::misc::tensor::Shape &rhs_shape);
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_SHAPE_UTILS_H__
diff --git a/libs/tflite/include/tflite/TensorUtils.h b/libs/tflite/include/tflite/TensorUtils.h
deleted file mode 100644
index 6266c5dff..000000000
--- a/libs/tflite/include/tflite/TensorUtils.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorUtils.h
- * @brief This file contains utilities function
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_UTILS_H__
-#define __NNFW_TFLITE_TENSOR_UTILS_H__
-
-#include <tensorflow/contrib/lite/context.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Get @c true if tensor type is kTfLiteFloat32, otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor type is kTfLiteFloat32, otherwise @c false
- */
-inline bool isFloatTensor(const TfLiteTensor *tensor) { return tensor->type == kTfLiteFloat32; }
-
-/**
- * @brief Get @c true if tensor is 4-D tensor and the first dimension length is 1,
- * otherwise @c false
- * @param[in] tensor The tensor object to be compared
- * @return @c true if tensor is 4-D tensor and the first dimension length is 1, otherwise @c false
- */
-inline bool isFeatureTensor(const TfLiteTensor *tensor)
-{
- return (tensor->dims->size == 4) && (tensor->dims->data[0] == 1);
-}
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_UTILS_H__
diff --git a/libs/tflite/include/tflite/TensorView.h b/libs/tflite/include/tflite/TensorView.h
deleted file mode 100644
index 79c754c78..000000000
--- a/libs/tflite/include/tflite/TensorView.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorView.h
- * @brief This file contains TensorView class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_TENSOR_VIEW_H__
-#define __NNFW_TFLITE_TENSOR_VIEW_H__
-
-#include "tensorflow/contrib/lite/interpreter.h"
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Reader.h"
-#include "misc/tensor/NonIncreasingStride.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define TensorView which is inherited from nnfw::misc::tensor::Reader<T> class
- */
-template <typename T> class TensorView final : public nnfw::misc::tensor::Reader<T>
-{
-public:
- /**
- * @brief Construct a TensorView object with base and shape informations
- * @param[in] shape The shape of a tensor
- * @param[in] base The base address of a tensor
- */
- TensorView(const nnfw::misc::tensor::Shape &shape, T *base) : _shape{shape}, _base{base}
- {
- // Set 'stride'
- _stride.init(_shape);
- }
-
-public:
- /**
- * @brief Get shape of tensor
- * @return Reference of shape
- */
- const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
-
-public:
- /**
- * @brief Get value of tensor index
- * @param[in] index The tensor index
- * @return The value at the index
- */
- T at(const nnfw::misc::tensor::Index &index) const override
- {
- const auto offset = _stride.offset(index);
- return *(_base + offset);
- }
-
-public:
- /**
- * @brief Get reference value of tensor index
- * @param[in] index The tensor index
- * @return The reference value at the index
- */
- T &at(const nnfw::misc::tensor::Index &index)
- {
- const auto offset = _stride.offset(index);
- return *(_base + offset);
- }
-
-private:
- nnfw::misc::tensor::Shape _shape; /**< The tensor shape */
-
-public:
- T *_base; /**< The base address of tensor */
- nnfw::misc::tensor::NonIncreasingStride _stride; /**< The NonIncreasingStride object */
-
-public:
- // TODO Introduce Operand ID class
- /**
- * @brief Create TensorView object using given parameters
- * @param[in] interp The TfLite interpreter
- * @param[in] tensor_index The tensor index
- * @return The new TensorView<T> object
- */
- static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
- {
- auto tensor_ptr = interp.tensor(tensor_index);
-
- // Set 'shape'
- nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
-
- for (uint32_t axis = 0; axis < shape.rank(); ++axis)
- {
- shape.dim(axis) = tensor_ptr->dims->data[axis];
- }
-
- return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
- }
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_TENSOR_VIEW_H__
diff --git a/libs/tflite/include/tflite/ext/kernels/Abs.h b/libs/tflite/include/tflite/ext/kernels/Abs.h
deleted file mode 100644
index 74e4aa658..000000000
--- a/libs/tflite/include/tflite/ext/kernels/Abs.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_ABS_H__
-#define __NNFW_TFLITE_EXT_KERNELS_ABS_H__
-
-#include "tensorflow/contrib/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace Abs
-{
-
-void *InitAbs(TfLiteContext *context, const char *buffer, size_t length);
-void FreeAbs(TfLiteContext *context, void *buffer);
-TfLiteStatus PrepareAbs(TfLiteContext *context, TfLiteNode *node);
-TfLiteStatus EvalAbs(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace Abs
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_ABS_H__
diff --git a/libs/tflite/include/tflite/ext/kernels/CustomOps.h b/libs/tflite/include/tflite/ext/kernels/CustomOps.h
deleted file mode 100644
index 3f9459bb2..000000000
--- a/libs/tflite/include/tflite/ext/kernels/CustomOps.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CustomOps.h
- * @brief This file contains registration of custom operands
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
-#define __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
-
-#include "tensorflow/contrib/lite/context.h"
-#include "tflite/ext/kernels/TensorFlowMax.h"
-#include "tflite/ext/kernels/SquaredDifference.h"
-#include "tflite/ext/kernels/TensorFlowSum.h"
-#include "tflite/ext/kernels/Abs.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-
-#define REGISTER_FUNCTION(Name) \
- TfLiteRegistration *Register_##Name(void) \
- { \
- static TfLiteRegistration r = { \
- Name::Init##Name, Name::Free##Name, Name::Prepare##Name, Name::Eval##Name, \
- }; \
- r.custom_name = #Name; \
- return &r; \
- }
-
-REGISTER_FUNCTION(TensorFlowMax)
-REGISTER_FUNCTION(SquaredDifference)
-REGISTER_FUNCTION(TensorFlowSum)
-REGISTER_FUNCTION(Abs)
-
-#undef REGISTER_FUNCTION
-
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_CUSTOM_OP_H__
diff --git a/libs/tflite/include/tflite/ext/kernels/SquaredDifference.h b/libs/tflite/include/tflite/ext/kernels/SquaredDifference.h
deleted file mode 100644
index 492523c02..000000000
--- a/libs/tflite/include/tflite/ext/kernels/SquaredDifference.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file SquaredDifference.h
- * @brief This file contains SquaredDifference namespace and SquaredDifference function
- * definitions
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__
-#define __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__
-
-#include "tensorflow/contrib/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace SquaredDifference
-{
-
-/**
- * @brief Initialize SquaredDifference operand using the contents of buffer
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @param[in] length The buffer length
- * @return The void pointer for user data
- */
-void *InitSquaredDifference(TfLiteContext *context, const char *buffer, size_t length);
-
-/**
- * @brief Release any memory it might have allocated via 'InitSquaredDifference'
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @return N/A
- */
-void FreeSquaredDifference(TfLiteContext *context, void *buffer);
-
-/**
- * @brief Prepare the SquaredDifference operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node);
-
-/**
- * @brief Evaluation the SquaredDifference operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace SquaredDifference
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_SQUARED_DIFFERENCE_H__
diff --git a/libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h b/libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h
deleted file mode 100644
index d31d76483..000000000
--- a/libs/tflite/include/tflite/ext/kernels/TensorFlowMax.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file TensorFlowMax.h
- * @brief This file contains TensorFlowMax namespace and TensorFlowMax function definitions
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__
-#define __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__
-
-#include "tensorflow/contrib/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowMax
-{
-
-/**
- * @brief Initialize TensorFlowMax operand using the contents of buffer
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @param[in] length The buffer length
- * @return The void pointer for user data
- */
-void *InitTensorFlowMax(TfLiteContext *context, const char *buffer, size_t length);
-
-/**
- * @brief Release any memory it might have allocated via 'InitTensorFlowMax'
- * @param[in] context The TfLite context
- * @param[in] buffer The buffer with contents
- * @return N/A
- */
-void FreeTensorFlowMax(TfLiteContext *context, void *buffer);
-
-/**
- * @brief Prepare the TensorFlowMax operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node);
-
-/**
- * @brief Evaluation the TensorFlowMax operand for execution
- * @param[in] context The TfLite context
- * @param[in] node The operand node
- * @return The TfLite status
- */
-TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace TensorFlowMax
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_MAX_H__
diff --git a/libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h b/libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h
deleted file mode 100644
index 66783cf41..000000000
--- a/libs/tflite/include/tflite/ext/kernels/TensorFlowSum.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__
-#define __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__
-
-#include "tensorflow/contrib/lite/context.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowSum
-{
-
-void *InitTensorFlowSum(TfLiteContext *context, const char *buffer, size_t length);
-void FreeTensorFlowSum(TfLiteContext *context, void *buffer);
-TfLiteStatus PrepareTensorFlowSum(TfLiteContext *context, TfLiteNode *node);
-TfLiteStatus EvalTensorFlowSum(TfLiteContext *context, TfLiteNode *node);
-
-} // namespace TensorFlowSum
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_TENSORFLOW_SUM_H__
diff --git a/libs/tflite/include/tflite/ext/kernels/register.h b/libs/tflite/include/tflite/ext/kernels/register.h
deleted file mode 100644
index 124af7abc..000000000
--- a/libs/tflite/include/tflite/ext/kernels/register.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow)
-// 'externals/tensorflow/tensorflow/contrib/lite/kernels/register.h'
-#ifndef __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__
-#define __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__
-
-#include <unordered_map>
-#include "tensorflow/contrib/lite/context.h"
-#include "tensorflow/contrib/lite/model.h"
-
-namespace nnfw {
-namespace tflite {
-
-class BuiltinOpResolver : public ::tflite::MutableOpResolver {
- public:
- BuiltinOpResolver();
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_KERNELS_REGISTER_H__
-
-// clang-format on
diff --git a/libs/tflite/include/tflite/ext/nnapi_delegate.h b/libs/tflite/include/tflite/ext/nnapi_delegate.h
deleted file mode 100644
index 3aac01af7..000000000
--- a/libs/tflite/include/tflite/ext/nnapi_delegate.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This header is derived from the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.h'
-#ifndef __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__
-#define __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__
-
-#include "tensorflow/contrib/lite/allocation.h"
-#ifdef OBS_BUILD
-#include "tensorflow/contrib/lite/context.h"
-#include "tensorflow/contrib/lite/error_reporter.h"
-#else
-#include "tensorflow/contrib/lite/c/c_api_internal.h"
-#include "tensorflow/contrib/lite/core/api/error_reporter.h"
-#endif
-#include "tensorflow/contrib/lite/interpreter.h"
-#include "NeuralNetworksShim.h"
-
-class ANeuralNetworksModel;
-class ANeuralNetworksMemory;
-class ANeuralNetworksCompilation;
-
-namespace nnfw {
-namespace tflite {
-
-class NNAPIAllocation : public ::tflite::MMAPAllocation {
- public:
- NNAPIAllocation(const char* filename, ::tflite::ErrorReporter* error_reporter);
- ~NNAPIAllocation();
-
- size_t offset(const void* ptr) const {
- auto signed_offset = reinterpret_cast<const uint8_t*>(ptr) -
- reinterpret_cast<const uint8_t*>(mmapped_buffer_);
-
- return static_cast<size_t>(signed_offset);
- }
-
- ANeuralNetworksMemory* memory() const { return handle_; }
- bool valid() const override { return handle_ != nullptr; }
-
- private:
- mutable ANeuralNetworksMemory* handle_ = nullptr;
-};
-
-class NNAPIDelegate {
- public:
- ~NNAPIDelegate();
-
- // Convert a tflite graph to NNAPI
- TfLiteStatus BuildGraph(::tflite::Interpreter* interpreter);
-
- // Run
- TfLiteStatus Invoke(::tflite::Interpreter* interpreter);
-
- // Whether the current platform supports NNAPI delegation.
- static bool IsSupported();
-
- private:
- // The NN API model handle
- ANeuralNetworksModel* nn_model_ = nullptr;
- // The NN API compilation handle
- ANeuralNetworksCompilation* nn_compiled_model_ = nullptr;
- // Model status
- TfLiteStatus model_status_ = kTfLiteOk;
-
- // List of state tensors for LSTM, RNN, SVDF.
- // NN API does not allow ops to maintain states across multiple
- // invocations. We need to manually create state input tensors from
- // corresponding state output tensors of TFLite operations, and map them
- // correctly.
- std::vector<int> model_states_inputs_; // holds NNAPI operand ids
- std::vector<int> model_states_outputs_; // holds TFLite tensor ids
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_EXT_NNAPI_DELEGATE_H__
-
-// clang-format on
diff --git a/libs/tflite/include/tflite/interp/Builder.h b/libs/tflite/include/tflite/interp/Builder.h
deleted file mode 100644
index b4d082419..000000000
--- a/libs/tflite/include/tflite/interp/Builder.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file Builder.h
- * @brief This file contains Builder structure
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_BUILDER_H__
-
-#include <tensorflow/contrib/lite/interpreter.h>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Structure to Builder
- */
-struct Builder
-{
- /**
- * @brief Destroy the Builder object
- */
- virtual ~Builder() = default;
-
- /**
- * @brief Build a FlatBuffer model
- * @return The TfLite interpreter object
- */
- virtual std::unique_ptr<::tflite::Interpreter> build(void) const = 0;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_BUILDER_H__
diff --git a/libs/tflite/include/tflite/interp/FlatBufferBuilder.h b/libs/tflite/include/tflite/interp/FlatBufferBuilder.h
deleted file mode 100644
index 13470b8c5..000000000
--- a/libs/tflite/include/tflite/interp/FlatBufferBuilder.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file FlatBufferBuilder.h
- * @brief This file contains FlatBufferBuilder class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
-
-#include <tensorflow/contrib/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FlatBufferBuilder which is inherited from Builder
- */
-class FlatBufferBuilder final : public Builder
-{
-public:
- /**
- * @brief Construct a FlatBufferBuilder object with FlatBufferModel of TfLite
- * @param[in] model The TfLite Flatbuffer model
- */
- FlatBufferBuilder(const ::tflite::FlatBufferModel &model) : _model{model}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Build a FlatBuffer model
- * @return The TfLite interpreter pointer address
- */
- std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
- const ::tflite::FlatBufferModel &_model;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FLAT_BUFFER_BUILDER_H__
diff --git a/libs/tflite/include/tflite/interp/FunctionBuilder.h b/libs/tflite/include/tflite/interp/FunctionBuilder.h
deleted file mode 100644
index 064375939..000000000
--- a/libs/tflite/include/tflite/interp/FunctionBuilder.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file FunctionBuilder.h
- * @brief This file contains FunctionBuilder class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-#define __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
-
-#include <tensorflow/contrib/lite/model.h>
-
-#include "tflite/interp/Builder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-/**
- * @brief Class to define FunctionBuilder which is inherited from Builder
- */
-class FunctionBuilder final : public Builder
-{
-public:
- using SetupFunc = std::function<void(::tflite::Interpreter &)>;
-
-public:
- /**
- * @brief Construct a FunctionBuilder object with SetupFunction
- * @param[in] fn The SetupFunc object
- */
- FunctionBuilder(const SetupFunc &fn) : _fn{fn}
- {
- // DO NOTHING
- }
-
-public:
- /**
- * @brief Build a SetupFunc
- * @return The TfLite interpreter pointer address
- */
- std::unique_ptr<::tflite::Interpreter> build(void) const override;
-
-private:
- SetupFunc _fn;
-};
-
-} // namespace tflite
-} // namespace nnfw
-
-#endif // __NNFW_TFLITE_INTERP_FUNCTION_BUILDER_H__
diff --git a/libs/tflite/src/Diff.cpp b/libs/tflite/src/Diff.cpp
deleted file mode 100644
index 45ef06110..000000000
--- a/libs/tflite/src/Diff.cpp
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/Diff.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-#include "misc/fp32.h"
-
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/IndexFormatter.h"
-#include "misc/tensor/Zipper.h"
-#include "misc/tensor/Comparator.h"
-
-#include "misc/environment.h"
-
-#include <iostream>
-#include <cassert>
-
-class DiffSummary : public nnfw::misc::tensor::Comparator::Observer
-{
-public:
- DiffSummary()
- : max_abs_diff_index(0), max_abs_diff_expected{0.0f}, max_abs_diff_obtained{0.0f},
- max_abs_diff_value{0.0f}, max_rel_diff_index(0), max_rel_diff_expected{0.0f},
- max_rel_diff_obtained{0.0f}, max_rel_diff_value{0.0f}
- {
- // DO NOTHING
- }
-
-public:
- void notify(const nnfw::misc::tensor::Index &index, float expected, float obtained) override;
-
-public:
- nnfw::misc::tensor::Index max_abs_diff_index;
- float max_abs_diff_expected;
- float max_abs_diff_obtained;
- float max_abs_diff_value;
-
- nnfw::misc::tensor::Index max_rel_diff_index;
- float max_rel_diff_expected;
- float max_rel_diff_obtained;
- float max_rel_diff_value;
-};
-
-void DiffSummary::notify(const nnfw::misc::tensor::Index &index, float expected, float obtained)
-{
- const auto abs_diff_value = std::fabs(expected - obtained);
-
- if (max_abs_diff_value < abs_diff_value)
- {
- max_abs_diff_index = index;
- max_abs_diff_value = abs_diff_value;
- max_abs_diff_expected = expected;
- max_abs_diff_obtained = obtained;
- }
-
- const auto rel_diff_value = nnfw::misc::fp32::relative_diff(expected, obtained);
-
- if (max_rel_diff_value < rel_diff_value)
- {
- max_rel_diff_index = index;
- max_rel_diff_value = rel_diff_value;
- max_rel_diff_expected = expected;
- max_rel_diff_obtained = obtained;
- }
-}
-
-template <typename T>
-bool TfLiteInterpMatchApp::compareSingleTensorView(const nnfw::tflite::TensorView<T> &expected,
- const nnfw::tflite::TensorView<T> &obtained,
- int id) const
-{
- std::vector<nnfw::misc::tensor::Diff<T>> diffs;
- assert(expected.shape() == obtained.shape());
-
- using nnfw::misc::tensor::zip;
- using nnfw::misc::tensor::Index;
-
- zip(expected.shape(), expected, obtained)
- << [&](const Index &index, T expected_value, T obtained_value) {
- if (expected_value != obtained_value)
- {
- diffs.emplace_back(index, expected_value, obtained_value);
- }
- };
-
- // TODO Unify summary generation code
- if (diffs.size() == 0)
- {
- std::cout << " Tensor #" << id << ": MATCHED" << std::endl;
- }
- else
- {
- std::cout << " Tensor #" << id << ": UNMATCHED" << std::endl;
- std::cout << " " << diffs.size() << " diffs are detected" << std::endl;
- }
-
- if (diffs.size() > 0 && _verbose != 0)
- {
- std::cout << " ---- Details ---" << std::endl;
- for (const auto &diff : diffs)
- {
- std::cout << " Diff at [" << nnfw::misc::tensor::IndexFormatter(diff.index) << "]"
- << std::endl;
- std::cout << " expected: " << diff.expected << std::endl;
- std::cout << " obtained: " << diff.obtained << std::endl;
- }
- }
-
- return diffs.size() == 0;
-}
-
-template <>
-bool TfLiteInterpMatchApp::compareSingleTensorView<float>(
- const nnfw::tflite::TensorView<float> &expected,
- const nnfw::tflite::TensorView<float> &obtained, int id) const
-{
- DiffSummary summary;
-
- assert(expected.shape() == obtained.shape());
- auto diffs = _comparator.compare(expected.shape(), expected, obtained, &summary);
-
- // TODO Unify summary generation code
- if (diffs.size() == 0)
- {
- std::cout << " Tensor #" << id << ": MATCHED" << std::endl;
- }
- else
- {
- std::cout << " Tensor #" << id << ": UNMATCHED" << std::endl;
- std::cout << " " << diffs.size() << " diffs are detected" << std::endl;
- }
-
- // Print out max_diff
- if (summary.max_abs_diff_value > 0)
- {
- std::cout << " Max absolute diff at ["
- << nnfw::misc::tensor::IndexFormatter(summary.max_abs_diff_index) << "]" << std::endl;
- std::cout << " expected: " << summary.max_abs_diff_expected << std::endl;
- std::cout << " obtained: " << summary.max_abs_diff_obtained << std::endl;
- std::cout << " absolute diff: " << summary.max_abs_diff_value << std::endl;
- }
-
- if (summary.max_rel_diff_value > 0)
- {
- const auto tolerance_level = summary.max_rel_diff_value / FLT_EPSILON;
-
- std::cout << " Max relative diff at ["
- << nnfw::misc::tensor::IndexFormatter(summary.max_rel_diff_index) << "]" << std::endl;
- std::cout << " expected: " << summary.max_rel_diff_expected << std::endl;
- std::cout << " obtained: " << summary.max_rel_diff_obtained << std::endl;
- std::cout << " relative diff: " << summary.max_rel_diff_value << std::endl;
- std::cout << " (tolerance level = " << tolerance_level << ")" << std::endl;
- }
-
- if (diffs.size() > 0)
- {
- if (_verbose != 0)
- {
- std::cout << " ---- Details ---" << std::endl;
- for (const auto &diff : diffs)
- {
- const auto absolute_diff = std::fabs(diff.expected - diff.obtained);
- const auto relative_diff = nnfw::misc::fp32::relative_diff(diff.expected, diff.obtained);
- const auto tolerance_level = relative_diff / FLT_EPSILON;
-
- std::cout << " Diff at [" << nnfw::misc::tensor::IndexFormatter(diff.index) << "]"
- << std::endl;
- std::cout << " expected: " << diff.expected << std::endl;
- std::cout << " obtained: " << diff.obtained << std::endl;
- std::cout << " absolute diff: " << absolute_diff << std::endl;
- std::cout << " relative diff: " << relative_diff << std::endl;
- std::cout << " (tolerance level = " << tolerance_level << ")" << std::endl;
- }
- }
-
- return false;
- }
- return true;
-}
-
-#include <map>
-
-bool TfLiteInterpMatchApp::run(::tflite::Interpreter &interp, ::tflite::Interpreter &nnapi) const
-{
- assert(interp.outputs() == nnapi.outputs());
-
- bool all_matched = true;
-
- using Comparator = std::function<bool(int id, ::tflite::Interpreter &, ::tflite::Interpreter &)>;
-
- std::map<TfLiteType, Comparator> comparators;
-
- comparators[kTfLiteUInt8] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<uint8_t>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<uint8_t>::make(nnapi, id);
-
- return compareSingleTensorView(expected, obtained, id);
- };
-
- comparators[kTfLiteInt32] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<int32_t>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<int32_t>::make(nnapi, id);
-
- return compareSingleTensorView(expected, obtained, id);
- };
-
- comparators[kTfLiteFloat32] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<float>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<float>::make(nnapi, id);
-
- return compareSingleTensorView(expected, obtained, id);
- };
-
- comparators[kTfLiteBool] = [this](int id, ::tflite::Interpreter &interp,
- ::tflite::Interpreter &nnapi) {
- const auto expected = nnfw::tflite::TensorView<bool>::make(interp, id);
- const auto obtained = nnfw::tflite::TensorView<bool>::make(nnapi, id);
-
- return compareSingleTensorView(expected, obtained, id);
- };
-
- for (const auto &id : interp.outputs())
- {
- assert(interp.tensor(id)->type == nnapi.tensor(id)->type);
-
- auto it = comparators.find(interp.tensor(id)->type);
-
- if (it == comparators.end())
- {
- throw std::runtime_error{"Not supported output type"};
- }
-
- const auto &comparator = it->second;
-
- if (!comparator(id, interp, nnapi))
- {
- all_matched = false;
- }
- }
-
- return all_matched;
-}
-
-#include "misc/tensor/Object.h"
-
-using namespace std::placeholders;
-
-template <> uint8_t RandomGenerator::generate<uint8_t>(void)
-{
- // The value of type_range is 255.
- float type_range = static_cast<float>(std::numeric_limits<uint8_t>::max()) -
- static_cast<float>(std::numeric_limits<uint8_t>::min());
- // Most _dist values range from -5.0 to 5.0.
- float min_range = -5.0f;
- float max_range = 5.0f;
- return static_cast<uint8_t>((_dist(_rand) - min_range) * type_range / (max_range - min_range));
-}
-
-#include "tflite/TensorLogger.h"
-//
-// Random Test Runner
-//
-int RandomTestRunner::run(const nnfw::tflite::Builder &builder)
-{
- auto tfl_interp = builder.build();
- auto nnapi = builder.build();
-
- tfl_interp->UseNNAPI(false);
-
- // Allocate Tensors
- tfl_interp->AllocateTensors();
- nnapi->AllocateTensors();
-
- assert(tfl_interp->inputs() == nnapi->inputs());
-
- using ::tflite::Interpreter;
- using Initializer = std::function<void(int id, Interpreter *, Interpreter *)>;
-
- std::map<TfLiteType, Initializer> initializers;
- std::map<TfLiteType, Initializer> reseters;
-
- // Generate singed 32-bit integer (s32) input
- initializers[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteInt32);
- assert(nnapi->tensor(id)->type == kTfLiteInt32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- int32_t value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- ++value;
- };
- };
-
- // Generate singed 32-bit integer (s32) input
- reseters[kTfLiteInt32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteInt32);
- assert(nnapi->tensor(id)->type == kTfLiteInt32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<int32_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<int32_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- int32_t value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- // TODO Generate random values
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- initializers[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteUInt8);
- assert(nnapi->tensor(id)->type == kTfLiteUInt8);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<uint8_t (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
- const ::nnfw::misc::tensor::Index &)>(
- &RandomGenerator::generate<uint8_t>);
- const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
- assert(tfl_interp_view.shape() == data.shape());
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
-
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- reseters[kTfLiteUInt8] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteUInt8);
- assert(nnapi->tensor(id)->type == kTfLiteUInt8);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<uint8_t>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<uint8_t>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<uint8_t (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
- const ::nnfw::misc::tensor::Index &)>(
- &RandomGenerator::generate<uint8_t>);
- const nnfw::misc::tensor::Object<uint8_t> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
- assert(tfl_interp_view.shape() == data.shape());
-
- uint8_t value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- initializers[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteFloat32);
- assert(nnapi->tensor(id)->type == kTfLiteFloat32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<float (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
- const ::nnfw::misc::tensor::Index &)>(
- &RandomGenerator::generate<float>);
- const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
-
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- reseters[kTfLiteFloat32] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteFloat32);
- assert(nnapi->tensor(id)->type == kTfLiteFloat32);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<float>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<float>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<float (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
- const ::nnfw::misc::tensor::Index &)>(
- &RandomGenerator::generate<float>);
- const nnfw::misc::tensor::Object<float> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- float value = 0;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- initializers[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteBool);
- assert(nnapi->tensor(id)->type == kTfLiteBool);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
- const ::nnfw::misc::tensor::Index &)>(
- &RandomGenerator::generate<bool>);
- const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- const auto value = data.at(ind);
-
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- reseters[kTfLiteBool] = [&](int id, Interpreter *tfl_interp, Interpreter *nnapi) {
- assert(tfl_interp->tensor(id)->type == kTfLiteBool);
- assert(nnapi->tensor(id)->type == kTfLiteBool);
-
- auto tfl_interp_view = nnfw::tflite::TensorView<bool>::make(*tfl_interp, id);
- auto nnapi_view = nnfw::tflite::TensorView<bool>::make(*nnapi, id);
-
- assert(tfl_interp_view.shape() == nnapi_view.shape());
-
- auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
- const ::nnfw::misc::tensor::Index &)>(
- &RandomGenerator::generate<bool>);
- const nnfw::misc::tensor::Object<bool> data(tfl_interp_view.shape(),
- std::bind(fp, _randgen, _1, _2));
-
- assert(tfl_interp_view.shape() == data.shape());
-
- bool value = false;
-
- nnfw::misc::tensor::iterate(tfl_interp_view.shape())
- << [&](const nnfw::misc::tensor::Index &ind) {
- tfl_interp_view.at(ind) = value;
- nnapi_view.at(ind) = value;
- };
- };
-
- // Fill IFM with random numbers
- for (const auto id : tfl_interp->inputs())
- {
- assert(tfl_interp->tensor(id)->type == nnapi->tensor(id)->type);
-
- auto it = initializers.find(tfl_interp->tensor(id)->type);
-
- if (it == initializers.end())
- {
- throw std::runtime_error{"Not supported input type"};
- }
-
- it->second(id, tfl_interp.get(), nnapi.get());
- }
-
- // Fill OFM with 0
- for (const auto id : tfl_interp->outputs())
- {
- assert(tfl_interp->tensor(id)->type == nnapi->tensor(id)->type);
-
- auto it = reseters.find(tfl_interp->tensor(id)->type);
-
- if (it == reseters.end())
- {
- throw std::runtime_error{"Not supported input type"};
- }
-
- it->second(id, tfl_interp.get(), nnapi.get());
- }
-
- std::cout << "[NNAPI TEST] Run T/F Lite Interpreter without NNAPI" << std::endl;
- tfl_interp->Invoke();
-
- std::cout << "[NNAPI TEST] Run T/F Lite Interpreter with NNAPI" << std::endl;
-
- char *env = getenv("UPSTREAM_DELEGATE");
-
- if (env && !std::string(env).compare("1"))
- {
- nnapi->UseNNAPI(true);
- nnapi->Invoke();
- }
- else
- {
- nnfw::tflite::NNAPIDelegate d;
-
- if (d.BuildGraph(nnapi.get()))
- {
- throw std::runtime_error{"Failed to BuildGraph"};
- }
-
- if (d.Invoke(nnapi.get()))
- {
- throw std::runtime_error{"Failed to BuildGraph"};
- }
- }
-
- // Compare OFM
- std::cout << "[NNAPI TEST] Compare the result" << std::endl;
-
- const auto tolerance = _param.tolerance;
-
- auto equals = [tolerance](float lhs, float rhs) {
- // NOTE Hybrid approach
- // TODO Allow users to set tolerance for absolute_epsilon_equal
- if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
- {
- return true;
- }
-
- return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
- };
-
- nnfw::misc::tensor::Comparator comparator(equals);
- TfLiteInterpMatchApp app(comparator);
-
- app.verbose() = _param.verbose;
-
- bool res = app.run(*tfl_interp, *nnapi);
-
- if (!res)
- {
- return 255;
- }
-
- std::cout << "[NNAPI TEST] PASSED" << std::endl;
-
- if (_param.tensor_logging)
- nnfw::tflite::TensorLogger::instance().save(_param.log_path, *tfl_interp);
-
- return 0;
-}
-
-RandomTestRunner RandomTestRunner::make(int seed)
-{
- RandomTestParam param;
-
- param.verbose = 0;
- param.tolerance = 1;
-
- nnfw::misc::env::IntAccessor("VERBOSE").access(param.verbose);
- nnfw::misc::env::IntAccessor("TOLERANCE").access(param.tolerance);
-
- return RandomTestRunner{seed, param};
-}
diff --git a/libs/tflite/src/FeatureView.cpp b/libs/tflite/src/FeatureView.cpp
deleted file mode 100644
index fdf5a4b00..000000000
--- a/libs/tflite/src/FeatureView.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/FeatureView.h"
-#include "tflite/TensorUtils.h"
-
-#include <cassert>
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::feature::Shape getFeatureShape(const TfLiteTensor *tensor)
-{
- nnfw::misc::feature::Shape shape{tensor->dims->data[3], tensor->dims->data[1],
- tensor->dims->data[2]};
-
- return shape;
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const InputIndex &index)
-{
- const auto tensor_index = interp.inputs().at(index.asInt());
- auto tensor_ptr = interp.tensor(tensor_index);
-
- assert(isFloatTensor(tensor_ptr));
- assert(isFeatureTensor(tensor_ptr));
-
- _shape = getFeatureShape(tensor_ptr);
- _base = interp.typed_tensor<float>(tensor_index);
-}
-
-FeatureView<float>::FeatureView(::tflite::Interpreter &interp, const OutputIndex &index)
-{
- const auto tensor_index = interp.outputs().at(index.asInt());
- auto tensor_ptr = interp.tensor(tensor_index);
-
- assert(isFloatTensor(tensor_ptr));
- assert(isFeatureTensor(tensor_ptr));
-
- _shape = getFeatureShape(tensor_ptr);
- _base = interp.typed_tensor<float>(tensor_index);
-}
-
-float FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col) const
-{
- return *(_base + getElementOffset(ch, row, col));
-}
-
-float &FeatureView<float>::at(uint32_t ch, uint32_t row, uint32_t col)
-{
- return *(_base + getElementOffset(ch, row, col));
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/Quantization.cpp b/libs/tflite/src/Quantization.cpp
deleted file mode 100644
index 9c162c342..000000000
--- a/libs/tflite/src/Quantization.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/Quantization.h"
-
-TfLiteQuantizationParams make_default_quantization(void)
-{
- return TfLiteQuantizationParams{0.0f, 0};
-}
diff --git a/libs/tflite/src/TensorShapeUtils.cpp b/libs/tflite/src/TensorShapeUtils.cpp
deleted file mode 100644
index b5d906719..000000000
--- a/libs/tflite/src/TensorShapeUtils.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-#include "tflite/TensorShapeUtils.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-nnfw::misc::tensor::Shape broadcast(const nnfw::misc::tensor::Shape &lhs_shape,
- const nnfw::misc::tensor::Shape &rhs_shape)
-{
- const uint32_t lhs_rank = lhs_shape.rank();
- const uint32_t rhs_rank = rhs_shape.rank();
- const uint32_t out_rank = std::max(lhs_rank, rhs_rank);
-
- // TODO Simplify implementation
- std::vector<int32_t> lhs_normalized_dims;
- std::vector<int32_t> rhs_normalized_dims;
-
- for (uint32_t n = 0; n < out_rank - lhs_rank; ++n)
- {
- lhs_normalized_dims.emplace_back(1);
- }
- for (uint32_t axis = 0; axis < lhs_rank; ++axis)
- {
- lhs_normalized_dims.emplace_back(lhs_shape.dim(axis));
- }
-
- for (uint32_t n = 0; n < out_rank - rhs_rank; ++n)
- {
- rhs_normalized_dims.emplace_back(1);
- }
- for (uint32_t axis = 0; axis < rhs_rank; ++axis)
- {
- rhs_normalized_dims.emplace_back(rhs_shape.dim(axis));
- }
-
- nnfw::misc::tensor::Shape out_shape(out_rank);
-
- for (uint32_t axis = 0; axis < out_rank; ++axis)
- {
- out_shape.dim(axis) = std::max(lhs_normalized_dims.at(axis), rhs_normalized_dims.at(axis));
- }
-
- return out_shape;
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/TensorView.test.cpp b/libs/tflite/src/TensorView.test.cpp
deleted file mode 100644
index c710b3c33..000000000
--- a/libs/tflite/src/TensorView.test.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/TensorView.h"
-
-#include <cassert>
-
-void int_test(void)
-{
- int value[6] = {1, 2, 3, 4, 5, 6};
-
- const nnfw::misc::tensor::Shape shape{2, 3};
- const nnfw::tflite::TensorView<int> view{shape, value};
-
- assert(view.at(nnfw::misc::tensor::Index{0, 0}) == 1);
- assert(view.at(nnfw::misc::tensor::Index{0, 1}) == 2);
- assert(view.at(nnfw::misc::tensor::Index{0, 2}) == 3);
- assert(view.at(nnfw::misc::tensor::Index{1, 0}) == 4);
- assert(view.at(nnfw::misc::tensor::Index{1, 1}) == 5);
- assert(view.at(nnfw::misc::tensor::Index{1, 2}) == 6);
-}
-
-int main(int argc, char **argv)
-{
- float value[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-
- const nnfw::misc::tensor::Shape shape{2, 3};
- const nnfw::tflite::TensorView<float> view{shape, value};
-
- assert(view.at(nnfw::misc::tensor::Index{0, 0}) == 1.0f);
- assert(view.at(nnfw::misc::tensor::Index{0, 1}) == 2.0f);
- assert(view.at(nnfw::misc::tensor::Index{0, 2}) == 3.0f);
- assert(view.at(nnfw::misc::tensor::Index{1, 0}) == 4.0f);
- assert(view.at(nnfw::misc::tensor::Index{1, 1}) == 5.0f);
- assert(view.at(nnfw::misc::tensor::Index{1, 2}) == 6.0f);
-
- int_test();
-
- return 0;
-}
diff --git a/libs/tflite/src/ext/kernels/Abs.cpp b/libs/tflite/src/ext/kernels/Abs.cpp
deleted file mode 100644
index 7e9c2338d..000000000
--- a/libs/tflite/src/ext/kernels/Abs.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/Abs.h"
-#include "tensorflow/contrib/lite/kernels/kernel_util.h"
-
-#include <iostream>
-#include <cmath>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace Abs
-{
-
-void *InitAbs(TfLiteContext *context, const char *buffer, size_t length) { return nullptr; }
-
-void FreeAbs(TfLiteContext *context, void *buffer) {}
-
-TfLiteStatus PrepareAbs(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 1);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- const TfLiteTensor *input = ::tflite::GetInput(context, node, 0);
- TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
-
- TF_LITE_ENSURE_EQ(context, input->type, output->type);
-
- return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input->dims));
-}
-
-TfLiteStatus EvalAbs(TfLiteContext *context, TfLiteNode *node)
-{
- const TfLiteTensor *input = ::tflite::GetInput(context, node, 0);
- TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
- size_t elements = ::tflite::NumElements(input);
- switch (input->type)
- {
- case kTfLiteFloat32:
- {
- auto *in = input->data.f;
- auto *in_end = in + elements;
- auto *out = output->data.f;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- case kTfLiteInt32:
- {
- auto *in = input->data.i32;
- auto *in_end = in + elements;
- auto *out = output->data.i32;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- case kTfLiteInt64:
- {
- auto *in = input->data.i64;
- auto *in_end = in + elements;
- auto *out = output->data.i64;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- case kTfLiteUInt8:
- {
- auto *in = input->data.uint8;
- auto *in_end = in + elements;
- auto *out = output->data.uint8;
- for (; in < in_end; in++, out++)
- *out = std::abs(*in);
- return kTfLiteOk;
- }
- default:
- {
- context->ReportError(context, "Input type %d is not supported", input->type);
- return kTfLiteError;
- }
- }
-}
-
-} // namespace Abs
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/ext/kernels/SquaredDifference.cpp b/libs/tflite/src/ext/kernels/SquaredDifference.cpp
deleted file mode 100644
index 8ac2b1de0..000000000
--- a/libs/tflite/src/ext/kernels/SquaredDifference.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/SquaredDifference.h"
-#include "tensorflow/contrib/lite/kernels/kernel_util.h"
-
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace SquaredDifference
-{
-
-void *InitSquaredDifference(TfLiteContext *context, const char *buffer, size_t length)
-{
- return nullptr;
-}
-
-void FreeSquaredDifference(TfLiteContext *context, void *buffer) {}
-
-TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- const TfLiteTensor *input1 = ::tflite::GetInput(context, node, 0);
- const TfLiteTensor *input2 = ::tflite::GetInput(context, node, 1);
- TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
-
- TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
- TF_LITE_ENSURE_EQ(context, input1->type, output->type);
-
- return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input1->dims));
-}
-
-TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node)
-{
-
- const TfLiteTensor *input1 = ::tflite::GetInput(context, node, 0);
- const TfLiteTensor *input2 = ::tflite::GetInput(context, node, 1);
-
- TfLiteTensor *output = ::tflite::GetOutput(context, node, 0);
-
- size_t elements = ::tflite::NumElements(input1);
-
- switch (input1->type)
- {
- case kTfLiteFloat32:
- {
- const float *in1 = input1->data.f;
- const float *in2 = input2->data.f;
- const float *in_end1 = in1 + elements;
- float *out = output->data.f;
-
- for (; in1 < in_end1; in1++, in2++, out++)
- *out = ((*in1 - *in2) * (*in1 - *in2));
-
- return kTfLiteOk;
- }
- case kTfLiteInt32:
- {
- const int *in1 = input1->data.i32;
- const int *in2 = input2->data.i32;
- const int *in_end1 = in1 + elements;
- int *out = output->data.i32;
-
- for (; in1 < in_end1; in1++, in2++, out++)
- *out = ((*in1 - *in2) * (*in1 - *in2));
-
- return kTfLiteOk;
- }
- case kTfLiteInt64:
- {
- const int64_t *in1 = input1->data.i64;
- const int64_t *in2 = input1->data.i64;
- const int64_t *in_end1 = in1 + elements;
- int64_t *out = output->data.i64;
-
- for (; in1 < in_end1; in1++, in2++, out++)
- *out = ((*in1 - *in2) * (*in1 - *in2));
-
- return kTfLiteOk;
- }
- default:
- {
- context->ReportError(context, "InputType is %d Unsupported", input1->type);
- return kTfLiteError;
- }
- }
-}
-
-} // namespace SquaredDifference
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/ext/kernels/TensorFlowMax.cpp b/libs/tflite/src/ext/kernels/TensorFlowMax.cpp
deleted file mode 100644
index d72ad242c..000000000
--- a/libs/tflite/src/ext/kernels/TensorFlowMax.cpp
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/TensorFlowMax.h"
-#include "tensorflow/contrib/lite/kernels/kernel_util.h"
-
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowMax
-{
-
-struct TensorFlowMaxOp
-{
- TensorFlowMaxOp(TfLiteContext *context, TfLiteNode *node)
- {
- input = ::tflite::GetInput(context, node, 0);
- axis = ::tflite::GetInput(context, node, 1);
- output = ::tflite::GetOutput(context, node, 0);
- }
- const TfLiteTensor *input;
- const TfLiteTensor *axis;
- TfLiteTensor *output;
-};
-
-void *InitTensorFlowMax(TfLiteContext *context, const char *buffer, size_t length)
-{
- // Creates two temp tensors to store index and axis for internal
- // implementation only.
- auto *scratch_tensor_index = new int;
- context->AddTensors(context, 2, scratch_tensor_index);
- return scratch_tensor_index;
-}
-
-void FreeTensorFlowMax(TfLiteContext *context, void *buffer)
-{
- delete static_cast<TensorFlowMaxOp *>(buffer);
-}
-
-// Resizes the temp tensor that stores resolved axis.
-TfLiteStatus ResizeTempAxis(TfLiteContext *context, TensorFlowMaxOp *op_context,
- TfLiteTensor *resolved_axis)
-{
- TfLiteIntArray *axis_size = TfLiteIntArrayCreate(1);
- axis_size->data[0] = static_cast<int>(::tflite::NumElements(op_context->axis));
- return context->ResizeTensor(context, resolved_axis, axis_size);
-}
-
-// Resizes output array based on the input size and resolved axis.
-TfLiteStatus ResizeOutputTensor(TfLiteContext *context, TensorFlowMaxOp *op_context)
-{
- size_t num_axis = ::tflite::NumElements(op_context->axis);
- TfLiteIntArray *input_dims = op_context->input->dims;
- int input_num_dims = ::tflite::NumDimensions(op_context->input);
- const int *axis = op_context->axis->data.i32;
-
- {
- // Calculates size of reducing axis.
- int num_reduce_axis = num_axis;
- for (int i = 0; i < num_axis; ++i)
- {
- int current = axis[i];
- if (current < 0)
- {
- current += input_num_dims;
- }
- TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims);
- for (int j = 0; j < i; ++j)
- {
- int previous = axis[j];
- if (previous < 0)
- {
- previous += input_num_dims;
- }
- if (current == previous)
- {
- --num_reduce_axis;
- break;
- }
- }
- }
- // Determines output dimensions.
- int output_num_dims = ::tflite::NumDimensions(op_context->output);
- TF_LITE_ENSURE(context, (input_num_dims == output_num_dims) ||
- (input_num_dims - num_reduce_axis == output_num_dims));
-
- if (input_num_dims == output_num_dims)
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCopy(input_dims);
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- int current = axis[axis_idx];
- output_dims->data[current] = 1;
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- else
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCreate(output_num_dims);
- int num_skip_axis = 0;
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- bool is_axis = false;
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx)
- {
- ++num_skip_axis;
- is_axis = true;
- break;
- }
- }
- if (!is_axis)
- {
- output_dims->data[idx - num_skip_axis] = input_dims->data[idx];
- }
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- }
-}
-
-// Initializes temp tensors to store index and resolved axis.
-TfLiteStatus InitializeTemporaries(TfLiteContext *context, TfLiteNode *node,
- TensorFlowMaxOp *op_context)
-{
- // Creates a temp index to iterate through input data.
- int *scratch_tensor_index = reinterpret_cast<int *>(node->user_data);
- TfLiteIntArrayFree(node->temporaries);
- node->temporaries = TfLiteIntArrayCreate(2);
- node->temporaries->data[0] = *scratch_tensor_index;
- TfLiteTensor *scratch_tensor = &context->tensors[node->temporaries->data[0]];
- scratch_tensor->type = kTfLiteInt32;
- scratch_tensor->allocation_type = kTfLiteArenaRw;
- TfLiteIntArray *index_size = TfLiteIntArrayCreate(1);
- index_size->data[0] = ::tflite::NumDimensions(op_context->input);
- TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, index_size));
-
- // Creates a temp tensor to store resolved axis given input data.
- node->temporaries->data[1] = *scratch_tensor_index + 1;
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- resolved_axis->type = kTfLiteInt32;
- return kTfLiteOk;
-}
-
-TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- TensorFlowMaxOp op_context(context, node);
- TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context));
-
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Leaves work to Eval if axis is not constant; else resizes output.
- if (!::tflite::IsConstantTensor(op_context.axis))
- {
- ::tflite::SetTensorToDynamic(op_context.output);
- ::tflite::SetTensorToDynamic(resolved_axis);
- return kTfLiteOk;
- }
- resolved_axis->allocation_type = kTfLiteArenaRw;
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- return ResizeOutputTensor(context, &op_context);
-}
-
-// Gets offset of index if expanded on axis. When expanded, the flattened offset
-// will not change, if the output index changes on the given axis. For example,
-// if you have a 2D tensor and you are expanding to 3D on axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map from the same flattened
-// offset.
-inline size_t ExpandedInputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- int out_idx = 0;
- for (int in_idx = 0; in_idx < num_dims; ++in_idx)
- {
- // if we need to expand this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (in_idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[in_idx]) + static_cast<size_t>(index[out_idx]);
- out_idx++;
- }
- else
- {
- offset = offset * static_cast<size_t>(dims[in_idx]);
- }
- }
- return offset;
-}
-
-// Gets offset of index if reducing on axis. When reducing, the flattened offset
-// will not change, if the input index changes on the given axis. For example,
-// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
-// offset.
-// TODO(kanlig): uses Dims to represent dimensions.
-inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- for (int idx = 0; idx < num_dims; ++idx)
- {
- // if we need to skip this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
- }
- }
- return offset;
-}
-
-// Gets next index to iterate through a multidimensional array.
-inline bool NextIndex(TfLiteContext *context, const int num_dims, const int *dims, int *current)
-{
- int carry = 1;
- for (int idx = num_dims - 1; idx >= 0; --idx)
- {
- int current_val = current[idx] + carry;
- TF_LITE_ENSURE(context, (dims[idx] >= current_val));
- if (dims[idx] == current_val)
- {
- current[idx] = 0;
- }
- else
- {
- current[idx] = current_val;
- carry = 0;
- break;
- }
- }
- return (carry == 0);
-}
-
-template <typename T>
-inline TfLiteStatus
-CustomMax(TfLiteContext *context, T *input_data, const int *input_dims, const int input_num_dims,
- T *output_data, const int *output_dims, const int output_num_dims, const int *axis,
- const int num_axis_dimensions, bool keep_dims, int *temp_index, int *resolved_axis)
-{
- // resolves axis.
- int num_resolved_axis = 0;
- for (int idx = 0; idx < num_axis_dimensions; ++idx)
- {
- int current = axis[idx];
- TF_LITE_ENSURE(context, (current < input_num_dims && current + input_num_dims >= 0));
- if (current < 0)
- {
- current += input_num_dims;
- }
- bool is_dup = false;
- for (int j = 0; j < num_resolved_axis; ++j)
- {
- if (resolved_axis[j] == current)
- {
- is_dup = true;
- break;
- }
- }
- if (!is_dup)
- {
- resolved_axis[num_resolved_axis++] = current;
- }
- }
-
- TF_LITE_ENSURE(context, (input_num_dims > 0));
- TF_LITE_ENSURE(context, (input_dims != nullptr));
- TF_LITE_ENSURE(context, (temp_index != nullptr));
-
- // resets output data.
- for (int idx = 0; idx < output_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, output_num_dims, output_dims, temp_index))
- {
- size_t output_offset =
- ReducedOutputOffset(output_num_dims, output_dims, temp_index, 0, nullptr);
- size_t input_offset = ExpandedInputOffset(input_num_dims, input_dims, temp_index,
- num_resolved_axis, resolved_axis);
- output_data[output_offset] = input_data[input_offset];
- }
-
- // resets temp index.
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
-
- // iterates through input_data.
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, input_num_dims, input_dims, temp_index))
- {
- size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
- size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index,
- num_resolved_axis, resolved_axis);
- if (output_data[output_offset] < input_data[input_offset])
- {
- output_data[output_offset] = input_data[input_offset];
- }
- }
-
- return kTfLiteOk;
-}
-
-TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node)
-{
-
- TensorFlowMaxOp op_context(context, node);
- int num_axis = static_cast<int>(::tflite::NumElements(op_context.axis));
- TfLiteTensor *temp_index = &context->tensors[node->temporaries->data[0]];
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Resize the output tensor if the output tensor is dynamic.
- if (::tflite::IsDynamicTensor(op_context.output))
- {
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context));
- }
-
- TfLiteStatus returnStatus = kTfLiteOk;
- switch (op_context.input->type)
- {
- case kTfLiteFloat32:
- returnStatus = CustomMax<float>(
- context, op_context.input->data.f, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.f, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt32:
- returnStatus = CustomMax<int>(context, op_context.input->data.i32,
- op_context.input->dims->data, op_context.input->dims->size,
- op_context.output->data.i32, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteUInt8:
- returnStatus = CustomMax<uint8_t>(
- context, op_context.input->data.uint8, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.uint8,
- op_context.output->dims->data, op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt64:
- returnStatus = CustomMax<int64_t>(
- context, op_context.input->data.i64, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.i64, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- default:
- returnStatus = kTfLiteError;
- }
-
- return returnStatus;
-}
-
-} // namespace TensorFlowMax
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/ext/kernels/TensorFlowSum.cpp b/libs/tflite/src/ext/kernels/TensorFlowSum.cpp
deleted file mode 100644
index cbf97970c..000000000
--- a/libs/tflite/src/ext/kernels/TensorFlowSum.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/TensorFlowSum.h"
-#include "tensorflow/contrib/lite/kernels/kernel_util.h"
-
-#include <iostream>
-
-namespace nnfw
-{
-namespace tflite
-{
-namespace custom
-{
-namespace TensorFlowSum
-{
-
-struct TensorFlowSumOp
-{
- TensorFlowSumOp(TfLiteContext *context, TfLiteNode *node)
- {
- input = ::tflite::GetInput(context, node, 0);
- axis = ::tflite::GetInput(context, node, 1);
- output = ::tflite::GetOutput(context, node, 0);
- }
- const TfLiteTensor *input;
- const TfLiteTensor *axis;
- TfLiteTensor *output;
-};
-
-void *InitTensorFlowSum(TfLiteContext *context, const char *buffer, size_t length)
-{
- // Creates two temp tensors to store index and axis for internal
- // implementation only.
- auto *scratch_tensor_index = new int;
- context->AddTensors(context, 2, scratch_tensor_index);
- return scratch_tensor_index;
-}
-
-void FreeTensorFlowSum(TfLiteContext *context, void *buffer)
-{
- delete static_cast<TensorFlowSumOp *>(buffer);
-}
-
-// Resizes the temp tensor that stores resolved axis.
-TfLiteStatus ResizeTempAxis(TfLiteContext *context, TensorFlowSumOp *op_context,
- TfLiteTensor *resolved_axis)
-{
- TfLiteIntArray *axis_size = TfLiteIntArrayCreate(1);
- axis_size->data[0] = static_cast<int>(::tflite::NumElements(op_context->axis));
- return context->ResizeTensor(context, resolved_axis, axis_size);
-}
-
-// Resizes output array based on the input size and resolved axis.
-TfLiteStatus ResizeOutputTensor(TfLiteContext *context, TensorFlowSumOp *op_context)
-{
- size_t num_axis = ::tflite::NumElements(op_context->axis);
- TfLiteIntArray *input_dims = op_context->input->dims;
- int input_num_dims = ::tflite::NumDimensions(op_context->input);
- const int *axis = op_context->axis->data.i32;
-
- {
- // Calculates size of reducing axis.
- int num_reduce_axis = num_axis;
- for (int i = 0; i < num_axis; ++i)
- {
- int current = axis[i];
- if (current < 0)
- {
- current += input_num_dims;
- }
- TF_LITE_ENSURE(context, current >= 0 && current < input_num_dims);
- for (int j = 0; j < i; ++j)
- {
- int previous = axis[j];
- if (previous < 0)
- {
- previous += input_num_dims;
- }
- if (current == previous)
- {
- --num_reduce_axis;
- break;
- }
- }
- }
- // Determines output dimensions.
- int output_num_dims = ::tflite::NumDimensions(op_context->output);
- TF_LITE_ENSURE(context, (input_num_dims == output_num_dims) ||
- (input_num_dims - num_reduce_axis == output_num_dims));
-
- if (input_num_dims == output_num_dims)
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCopy(input_dims);
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- int current = axis[axis_idx];
- output_dims->data[current] = 1;
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- else
- {
- TfLiteIntArray *output_dims = TfLiteIntArrayCreate(output_num_dims);
- int num_skip_axis = 0;
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- bool is_axis = false;
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (axis[axis_idx] == idx || axis[axis_idx] + input_num_dims == idx)
- {
- ++num_skip_axis;
- is_axis = true;
- break;
- }
- }
- if (!is_axis)
- {
- output_dims->data[idx - num_skip_axis] = input_dims->data[idx];
- }
- }
- return context->ResizeTensor(context, op_context->output, output_dims);
- }
- }
-}
-
-// Initializes temp tensors to store index and resolved axis.
-TfLiteStatus InitializeTemporaries(TfLiteContext *context, TfLiteNode *node,
- TensorFlowSumOp *op_context)
-{
- // Creates a temp index to iterate through input data.
- int *scratch_tensor_index = reinterpret_cast<int *>(node->user_data);
- TfLiteIntArrayFree(node->temporaries);
- node->temporaries = TfLiteIntArrayCreate(2);
- node->temporaries->data[0] = *scratch_tensor_index;
- TfLiteTensor *scratch_tensor = &context->tensors[node->temporaries->data[0]];
- scratch_tensor->type = kTfLiteInt32;
- scratch_tensor->allocation_type = kTfLiteArenaRw;
- TfLiteIntArray *index_size = TfLiteIntArrayCreate(1);
- index_size->data[0] = ::tflite::NumDimensions(op_context->input);
- TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_tensor, index_size));
-
- // Creates a temp tensor to store resolved axis given input data.
- node->temporaries->data[1] = *scratch_tensor_index + 1;
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- resolved_axis->type = kTfLiteInt32;
- return kTfLiteOk;
-}
-
-TfLiteStatus PrepareTensorFlowSum(TfLiteContext *context, TfLiteNode *node)
-{
- TF_LITE_ENSURE_EQ(context, ::tflite::NumInputs(node), 2);
- TF_LITE_ENSURE_EQ(context, ::tflite::NumOutputs(node), 1);
-
- TensorFlowSumOp op_context(context, node);
- TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context));
-
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Leaves work to Eval if axis is not constant; else resizes output.
- if (!::tflite::IsConstantTensor(op_context.axis))
- {
- ::tflite::SetTensorToDynamic(op_context.output);
- ::tflite::SetTensorToDynamic(resolved_axis);
- return kTfLiteOk;
- }
- resolved_axis->allocation_type = kTfLiteArenaRw;
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- return ResizeOutputTensor(context, &op_context);
-}
-
-// Gets offset of index if expanded on axis. When expanded, the flattened offset
-// will not change, if the output index changes on the given axis. For example,
-// if you have a 2D tensor and you are expanding to 3D on axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map from the same flattened
-// offset.
-inline size_t ExpandedInputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- int out_idx = 0;
- for (int in_idx = 0; in_idx < num_dims; ++in_idx)
- {
- // if we need to expand this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (in_idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[in_idx]) + static_cast<size_t>(index[out_idx]);
- out_idx++;
- }
- else
- {
- offset = offset * static_cast<size_t>(dims[in_idx]);
- }
- }
- return offset;
-}
-
-// Gets offset of index if reducing on axis. When reducing, the flattened offset
-// will not change, if the input index changes on the given axis. For example,
-// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0,
-// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened
-// offset.
-// TODO(kanlig): uses Dims to represent dimensions.
-inline size_t ReducedOutputOffset(const int num_dims, const int *dims, const int *index,
- const int num_axis, const int *axis)
-{
- size_t offset = 0;
- for (int idx = 0; idx < num_dims; ++idx)
- {
- // if we need to skip this axis
- bool is_axis = false;
- if (axis != nullptr)
- {
- for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx)
- {
- if (idx == axis[axis_idx])
- {
- is_axis = true;
- break;
- }
- }
- }
- if (!is_axis)
- {
- offset = offset * static_cast<size_t>(dims[idx]) + static_cast<size_t>(index[idx]);
- }
- }
- return offset;
-}
-
-// Gets next index to iterate through a multidimensional array.
-inline bool NextIndex(TfLiteContext *context, const int num_dims, const int *dims, int *current)
-{
- int carry = 1;
- for (int idx = num_dims - 1; idx >= 0; --idx)
- {
- int current_val = current[idx] + carry;
- TF_LITE_ENSURE(context, (dims[idx] >= current_val));
- if (dims[idx] == current_val)
- {
- current[idx] = 0;
- }
- else
- {
- current[idx] = current_val;
- carry = 0;
- break;
- }
- }
- return (carry == 0);
-}
-
-template <typename T>
-inline TfLiteStatus
-CustomSum(TfLiteContext *context, T *input_data, const int *input_dims, const int input_num_dims,
- T *output_data, const int *output_dims, const int output_num_dims, const int *axis,
- const int num_axis_dimensions, bool keep_dims, int *temp_index, int *resolved_axis)
-{
- // resolves axis.
- int num_resolved_axis = 0;
- for (int idx = 0; idx < num_axis_dimensions; ++idx)
- {
- int current = axis[idx];
- TF_LITE_ENSURE(context, (current < input_num_dims && current + input_num_dims >= 0));
- if (current < 0)
- {
- current += input_num_dims;
- }
- bool is_dup = false;
- for (int j = 0; j < num_resolved_axis; ++j)
- {
- if (resolved_axis[j] == current)
- {
- is_dup = true;
- break;
- }
- }
- if (!is_dup)
- {
- resolved_axis[num_resolved_axis++] = current;
- }
- }
-
- TF_LITE_ENSURE(context, (input_num_dims > 0));
- TF_LITE_ENSURE(context, (input_dims != nullptr));
- TF_LITE_ENSURE(context, (temp_index != nullptr));
-
- // resets output data.
- for (int idx = 0; idx < output_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, output_num_dims, output_dims, temp_index))
- {
- size_t output_offset =
- ReducedOutputOffset(output_num_dims, output_dims, temp_index, 0, nullptr);
- output_data[output_offset] = 0;
- }
-
- // resets temp index.
- for (int idx = 0; idx < input_num_dims; ++idx)
- {
- temp_index[idx] = 0;
- }
-
- // iterates through input_data.
- for (bool has_next = true; has_next;
- has_next = NextIndex(context, input_num_dims, input_dims, temp_index))
- {
- size_t input_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index, 0, nullptr);
- size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, temp_index,
- num_resolved_axis, resolved_axis);
- output_data[output_offset] += input_data[input_offset];
- }
-
- return kTfLiteOk;
-}
-
-TfLiteStatus EvalTensorFlowSum(TfLiteContext *context, TfLiteNode *node)
-{
-
- TensorFlowSumOp op_context(context, node);
- int num_axis = static_cast<int>(::tflite::NumElements(op_context.axis));
- TfLiteTensor *temp_index = &context->tensors[node->temporaries->data[0]];
- TfLiteTensor *resolved_axis = &context->tensors[node->temporaries->data[1]];
- // Resize the output tensor if the output tensor is dynamic.
- if (::tflite::IsDynamicTensor(op_context.output))
- {
- TF_LITE_ENSURE_OK(context, ResizeTempAxis(context, &op_context, resolved_axis));
- TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context));
- }
-
- TfLiteStatus returnStatus = kTfLiteOk;
- switch (op_context.input->type)
- {
- case kTfLiteFloat32:
- returnStatus = CustomSum<float>(
- context, op_context.input->data.f, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.f, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt32:
- returnStatus = CustomSum<int>(context, op_context.input->data.i32,
- op_context.input->dims->data, op_context.input->dims->size,
- op_context.output->data.i32, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteUInt8:
- returnStatus = CustomSum<uint8_t>(
- context, op_context.input->data.uint8, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.uint8,
- op_context.output->dims->data, op_context.output->dims->size, op_context.axis->data.i32,
- num_axis, false, temp_index->data.i32, resolved_axis->data.i32);
- break;
- case kTfLiteInt64:
- returnStatus = CustomSum<int64_t>(
- context, op_context.input->data.i64, op_context.input->dims->data,
- op_context.input->dims->size, op_context.output->data.i64, op_context.output->dims->data,
- op_context.output->dims->size, op_context.axis->data.i32, num_axis, false,
- temp_index->data.i32, resolved_axis->data.i32);
- break;
- default:
- returnStatus = kTfLiteError;
- }
-
- return returnStatus;
-}
-
-} // namespace TensorFlowSum
-} // namespace custom
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/ext/kernels/register.cpp b/libs/tflite/src/ext/kernels/register.cpp
deleted file mode 100644
index b822bd616..000000000
--- a/libs/tflite/src/ext/kernels/register.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This code is derived from the following file (in TensorFlow)
-// 'externals/tensorflow/tensorflow/contrib/lite/kernels/register.cc'
-#include "tflite/ext/kernels/register.h"
-#include "tflite/ext/kernels/CustomOps.h"
-
-namespace tflite {
-namespace ops {
-namespace builtin {
-
-TfLiteRegistration *Register_RELU();
-TfLiteRegistration *Register_RELU_N1_TO_1();
-TfLiteRegistration *Register_RELU6();
-TfLiteRegistration *Register_TANH();
-TfLiteRegistration *Register_LOGISTIC();
-TfLiteRegistration *Register_AVERAGE_POOL_2D();
-TfLiteRegistration *Register_MAX_POOL_2D();
-TfLiteRegistration *Register_L2_POOL_2D();
-TfLiteRegistration *Register_CONV_2D();
-TfLiteRegistration *Register_DEPTHWISE_CONV_2D();
-TfLiteRegistration *Register_SVDF();
-TfLiteRegistration *Register_RNN();
-TfLiteRegistration *Register_BIDIRECTIONAL_SEQUENCE_RNN();
-TfLiteRegistration *Register_UNIDIRECTIONAL_SEQUENCE_RNN();
-TfLiteRegistration *Register_EMBEDDING_LOOKUP();
-TfLiteRegistration *Register_EMBEDDING_LOOKUP_SPARSE();
-TfLiteRegistration *Register_FULLY_CONNECTED();
-TfLiteRegistration *Register_LSH_PROJECTION();
-TfLiteRegistration *Register_HASHTABLE_LOOKUP();
-TfLiteRegistration *Register_SOFTMAX();
-TfLiteRegistration *Register_CONCATENATION();
-TfLiteRegistration *Register_ADD();
-TfLiteRegistration *Register_SPACE_TO_BATCH_ND();
-TfLiteRegistration *Register_DIV();
-TfLiteRegistration *Register_SUB();
-TfLiteRegistration *Register_BATCH_TO_SPACE_ND();
-TfLiteRegistration *Register_MUL();
-TfLiteRegistration *Register_L2_NORMALIZATION();
-TfLiteRegistration *Register_LOCAL_RESPONSE_NORMALIZATION();
-TfLiteRegistration *Register_LSTM();
-TfLiteRegistration *Register_BIDIRECTIONAL_SEQUENCE_LSTM();
-TfLiteRegistration *Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
-TfLiteRegistration *Register_PAD();
-TfLiteRegistration *Register_PADV2();
-TfLiteRegistration *Register_RESHAPE();
-TfLiteRegistration *Register_RESIZE_BILINEAR();
-TfLiteRegistration *Register_SKIP_GRAM();
-TfLiteRegistration *Register_SPACE_TO_DEPTH();
-TfLiteRegistration *Register_GATHER();
-TfLiteRegistration *Register_TRANSPOSE();
-TfLiteRegistration *Register_MEAN();
-TfLiteRegistration *Register_SPLIT();
-TfLiteRegistration *Register_SQUEEZE();
-TfLiteRegistration *Register_STRIDED_SLICE();
-TfLiteRegistration *Register_EXP();
-TfLiteRegistration *Register_TOPK_V2();
-TfLiteRegistration *Register_LOG_SOFTMAX();
-TfLiteRegistration *Register_CAST();
-TfLiteRegistration *Register_DEQUANTIZE();
-TfLiteRegistration *Register_PRELU();
-TfLiteRegistration *Register_MAXIMUM();
-TfLiteRegistration *Register_MINIMUM();
-TfLiteRegistration *Register_ARG_MAX();
-TfLiteRegistration *Register_GREATER();
-TfLiteRegistration *Register_GREATER_EQUAL();
-TfLiteRegistration *Register_LESS();
-TfLiteRegistration *Register_LESS_EQUAL();
-TfLiteRegistration *Register_FLOOR();
-TfLiteRegistration *Register_NEG();
-TfLiteRegistration *Register_SELECT();
-TfLiteRegistration *Register_SLICE();
-TfLiteRegistration *Register_SIN();
-TfLiteRegistration *Register_TRANSPOSE_CONV();
-TfLiteRegistration *Register_SPARSE_TO_DENSE();
-#ifndef OBS_BUILD
-TfLiteRegistration *Register_SUM();
-TfLiteRegistration *Register_REDUCE_MAX();
-TfLiteRegistration *Register_REDUCE_MIN();
-TfLiteRegistration *Register_EQUAL();
-TfLiteRegistration *Register_NOT_EQUAL();
-TfLiteRegistration *Register_SQRT();
-TfLiteRegistration *Register_RSQRT();
-TfLiteRegistration *Register_SHAPE();
-TfLiteRegistration *Register_POW();
-TfLiteRegistration *Register_FAKE_QUANT();
-TfLiteRegistration *Register_PACK();
-TfLiteRegistration *Register_ONE_HOT();
-TfLiteRegistration *Register_LOGICAL_OR();
-TfLiteRegistration *Register_LOGICAL_AND();
-TfLiteRegistration *Register_LOGICAL_NOT();
-TfLiteRegistration *Register_UNPACK();
-TfLiteRegistration *Register_FLOOR_DIV();
-TfLiteRegistration *Register_SQUARE();
-TfLiteRegistration *Register_ZEROS_LIKE();
-#endif // OBS_BUILD
-
-} // namespace builtin
-} // namespace ops
-} // namespace tflite
-
-namespace nnfw {
-namespace tflite {
-
-BuiltinOpResolver::BuiltinOpResolver()
-{
- // Using namespace directive to minimize diff with upstream tensorflow
- using namespace ::tflite::ops::builtin;
- using namespace ::tflite;
-
- AddBuiltin(BuiltinOperator_RELU, Register_RELU());
- AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1());
- AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
- AddBuiltin(BuiltinOperator_TANH, Register_TANH());
- AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC());
- AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D());
- AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D());
- AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_2D());
- AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D());
- AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D());
- AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
- AddBuiltin(BuiltinOperator_RNN, Register_RNN());
- AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, Register_BIDIRECTIONAL_SEQUENCE_RNN());
- AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, Register_UNIDIRECTIONAL_SEQUENCE_RNN());
- AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP());
- AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, Register_EMBEDDING_LOOKUP_SPARSE());
- AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED());
- AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION());
- AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP());
- AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX());
- AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION());
- AddBuiltin(BuiltinOperator_ADD, Register_ADD());
- AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND());
- AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND());
- AddBuiltin(BuiltinOperator_MUL, Register_MUL());
- AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
- AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, Register_LOCAL_RESPONSE_NORMALIZATION());
- AddBuiltin(BuiltinOperator_LSTM, Register_LSTM());
- AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, Register_BIDIRECTIONAL_SEQUENCE_LSTM());
- AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, Register_UNIDIRECTIONAL_SEQUENCE_LSTM());
- AddBuiltin(BuiltinOperator_PAD, Register_PAD());
- AddBuiltin(BuiltinOperator_PADV2, Register_PADV2());
- AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
- AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR());
- AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
- AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH());
- AddBuiltin(BuiltinOperator_GATHER, Register_GATHER());
- AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE());
- AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
- AddBuiltin(BuiltinOperator_DIV, Register_DIV());
- AddBuiltin(BuiltinOperator_SUB, Register_SUB());
- AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT());
- AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE());
- AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
- AddBuiltin(BuiltinOperator_EXP, Register_EXP());
- AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2());
- AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX());
- AddBuiltin(BuiltinOperator_CAST, Register_CAST());
- AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE());
- AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
- AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
- AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
- AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
- AddBuiltin(BuiltinOperator_GREATER, Register_GREATER());
- AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL());
- AddBuiltin(BuiltinOperator_LESS, Register_LESS());
- AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL());
- AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
- AddBuiltin(BuiltinOperator_NEG, Register_NEG());
- AddBuiltin(BuiltinOperator_SELECT, Register_SELECT());
- AddBuiltin(BuiltinOperator_SLICE, Register_SLICE());
- AddBuiltin(BuiltinOperator_SIN, Register_SIN());
-#ifndef OBS_BUILD
- AddBuiltin(BuiltinOperator_SUM, Register_SUM());
- AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX());
- AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN());
- AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, Register_TRANSPOSE_CONV());
- AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE());
- AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL());
- AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL());
- AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
- AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
- AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE());
- AddBuiltin(BuiltinOperator_POW, Register_POW());
- AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2);
- AddBuiltin(BuiltinOperator_PACK, Register_PACK());
- AddBuiltin(BuiltinOperator_ONE_HOT, Register_ONE_HOT());
- AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
- AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
- AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
- AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK());
- AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV());
- AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
- AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE());
-#endif // OBS_BUILD
-
- AddCustom("TensorFlowMax", nnfw::tflite::custom::Register_TensorFlowMax());
- AddCustom("SquaredDifference", nnfw::tflite::custom::Register_SquaredDifference());
- AddCustom("TensorFlowSum", nnfw::tflite::custom::Register_TensorFlowSum());
- AddCustom("Abs", nnfw::tflite::custom::Register_Abs());
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/ext/nnapi_delegate.cpp b/libs/tflite/src/ext/nnapi_delegate.cpp
deleted file mode 100644
index 25858a7b4..000000000
--- a/libs/tflite/src/ext/nnapi_delegate.cpp
+++ /dev/null
@@ -1,1209 +0,0 @@
-/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// NOTE To minimize diff with upstream tensorflow, disable clang-format
-// clang-format off
-
-// NOTE This code is derived from the following file (in TensorFlow v1.12)
-// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.cc'
-#include "tflite/ext/nnapi_delegate.h"
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#ifdef OBS_BUILD
-#include "tensorflow/contrib/lite/builtin_op_data.h"
-#include "tensorflow/contrib/lite/error_reporter.h"
-#else
-#include "tensorflow/contrib/lite/c/builtin_op_data.h"
-#include "tensorflow/contrib/lite/core/api/error_reporter.h"
-#endif
-#include "tensorflow/contrib/lite/model.h"
-#include "NeuralNetworksShim.h"
-#include "NeuralNetworksExShim.h"
-
-#ifdef __ANDROID__
-#include <android/log.h>
-#include <sys/system_properties.h>
-#endif
-
-namespace nnfw {
-namespace tflite {
-
-void logError(const char* format, ...) {
- // stderr is convenient for native tests, but is not captured for apps
- va_list args_for_stderr;
- va_start(args_for_stderr, format);
- vfprintf(stderr, format, args_for_stderr);
- va_end(args_for_stderr);
- fprintf(stderr, "\n");
- fflush(stderr);
-#ifdef __ANDROID__
- // produce logcat output for general consumption
- va_list args_for_log;
- va_start(args_for_log, format);
- __android_log_vprint(ANDROID_LOG_ERROR, "tflite", format, args_for_log);
- va_end(args_for_log);
-#endif
-}
-
-#define FATAL(...) \
- logError(__VA_ARGS__); \
- exit(1);
-
-// TODO(aselle): Change the error model to use status codes.
-#define CHECK_TFLITE_SUCCESS(x) \
- if (x != kTfLiteOk) { \
- FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \
- __LINE__); \
- }
-
-#define CHECK_NN(x) \
- if (x != ANEURALNETWORKS_NO_ERROR) { \
- FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \
- __LINE__); \
- }
-
-#define RETURN_ERROR_IF_TFLITE_FAILED(x) \
- if (x != kTfLiteOk) { \
- logError( \
- "Returning error since TFLite returned failure nnapi_delegate.cc:%d.", \
- __LINE__); \
- return kTfLiteError; \
- }
-
-#define RETURN_ERROR_IF_NN_FAILED(x) \
- if (x != ANEURALNETWORKS_NO_ERROR) { \
- logError( \
- "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \
- __LINE__); \
- return kTfLiteError; \
- }
-
-// Tracking of NNAPI operand ids
-static const int64_t kOperandIdNotSet = -1;
-static const int64_t kOperandNotNeeded = -2;
-
-namespace {
-
-int32_t GetAndroidSdkVersion() {
-#ifdef __ANDROID__
- const char* sdkProp = "ro.build.version.sdk";
- char sdkVersion[PROP_VALUE_MAX];
- int length = __system_property_get(sdkProp, sdkVersion);
- if (length != 0) {
- for (int i = 0; i < length; ++i) {
- int digit = sdkVersion[i] - '0';
- if (digit < 0 || digit > 9) {
- // Non-numeric SDK version, assume it's higher then expected;
- return 0xFFFF;
- }
- }
- return atoi(sdkVersion);
- }
- FATAL("No %s prop", sdkProp);
-#endif // __ANDROID__
- return 0;
-}
-
-int32_t GetAndroidSdkVersionCached() {
- static int32_t androidSdkVersion = GetAndroidSdkVersion();
- return androidSdkVersion;
-}
-
-static const uint32_t dimension_for_scalar[1] = {1};
-
-} // namespace
-
-NNAPIAllocation::NNAPIAllocation(const char* filename,
- ::tflite::ErrorReporter* error_reporter)
- : MMAPAllocation(filename, error_reporter) {
- if (mmapped_buffer_ != MAP_FAILED)
- CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ,
- mmap_fd_, 0, &handle_));
-}
-
-NNAPIAllocation::~NNAPIAllocation() {
- if (handle_) {
- ANeuralNetworksMemory_free(handle_);
- }
-}
-
-NNAPIDelegate::~NNAPIDelegate() {
- if (nn_compiled_model_) {
- ANeuralNetworksCompilation_free(nn_compiled_model_);
- nn_compiled_model_ = nullptr;
- }
- if (nn_model_) {
- ANeuralNetworksModel_free(nn_model_);
- nn_model_ = nullptr;
- // TODO(aselle): Is this thread-safe and callable multiple times?
- }
- // ANeuralNetworksShutdown();
-}
-
-// Adds the tensors of the interpreter to the NN API model.
-TfLiteStatus addTensorOperands(::tflite::Interpreter* interpreter,
- ANeuralNetworksModel* nn_model,
- uint32_t* no_of_operands_added,
- std::vector<int64_t>* nnapi_ids) {
- uint32_t next_id = 0;
- for (size_t i = 0; i < interpreter->tensors_size(); i++) {
- // Skip temporaries and RNN back-edges.
- if ((*nnapi_ids)[i] == kOperandNotNeeded) continue;
-
- (*nnapi_ids)[i] = int64_t(next_id);
-
- int32_t nn_type = 0;
- // NNAPI requires 32-bit float scale to be zero, tflite doesn't care
- float scale = 0.0f;
- int32_t zeroPoint = 0;
- TfLiteTensor* tensor = interpreter->tensor(i);
- switch (tensor->type) {
- case kTfLiteNoType:
- // Tensors added during initialization of Ops don't have a type yet and
- // should not be registered with the NNAPI.
- continue;
- case kTfLiteFloat32:
- nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
- break;
- case kTfLiteUInt8:
- nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
- scale = tensor->params.scale;
- // FIXME The next line is a workaround because currently zero scale is
- // passed down from TF
- // Lite. Note that the latest NeuralNetworks.h (see
- // https://android.googlesource.com/platform/frameworks/ml/+/master/nn/runtime/include/NeuralNetworks.h)
- // requires scale to be greater than zero. Remove this workaround
- // when the scale
- // value is correctly passed.
- scale = (scale == 0.0f) ? 1.0f : scale;
- zeroPoint = tensor->params.zero_point;
- break;
- case kTfLiteInt32:
- nn_type = ANEURALNETWORKS_TENSOR_INT32;
- scale = tensor->params.scale;
- zeroPoint = tensor->params.zero_point;
- break;
- case kTfLiteBool:
- // Workaround to pass bool type under NNAPI
- // Use bool type using ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with scale = 1.0f and zero_point = 0
- nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
- scale = 1.0f;
- zeroPoint = 0;
- break;
- default:
- logError("Unsupported tensor type %d", tensor->type);
- return kTfLiteError;
- }
- if (tensor->dims->size == 0) {
- // WORKAROUND Some model have dimension zero
- switch (tensor->type) {
- case kTfLiteFloat32:
- nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
- break;
- case kTfLiteInt32:
- nn_type = ANEURALNETWORKS_TENSOR_INT32;
- break;
- default:
- logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)",
- i, tensor->name);
- return kTfLiteError;
- }
- }
- if (tensor->dims->size > 4) {
- logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)",
- i, tensor->name);
- return kTfLiteError;
- }
- // TODO(aselle): Note, many of these are intermediate results. Do I need
- // to ever specify these sizes. I am currently below doing setValue
- // on all of them, but I shouldn't in the future.
- // Answer(jeanluc): If all the operators can set the dimension correctly,
- // you won't need to.
- ANeuralNetworksOperandType operand_type{
- nn_type, static_cast<uint32_t>(tensor->dims->size),
- reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
- if (tensor->dims->size == 0) {
- // WORKAROUND Some model have dimension zero
- // Consider scalar as vector size 1
- operand_type.dimensions = dimension_for_scalar;
- operand_type.dimensionCount = 1;
- }
- RETURN_ERROR_IF_NN_FAILED(
- ANeuralNetworksModel_addOperand(nn_model, &operand_type));
- // TODO(aselle): Based on Michael's suggestion, limiting this to read
- // only memory
- if (tensor->allocation_type == kTfLiteMmapRo) {
- if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
- static_cast<const ::tflite::Allocation*>(tensor->allocation))) {
- RETURN_ERROR_IF_NN_FAILED(
- ANeuralNetworksModel_setOperandValueFromMemory(
- nn_model, next_id, alloc->memory(),
- alloc->offset(tensor->data.raw), tensor->bytes));
- } else {
- RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
- nn_model, next_id, tensor->data.raw, tensor->bytes));
- }
- } else if (tensor->bytes == 0) {
- // These size 0 tensors are optional tensors reserved.
- RETURN_ERROR_IF_NN_FAILED(
- ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0));
- }
-
- ++next_id;
- }
- *no_of_operands_added = next_id;
- return kTfLiteOk;
-}
-
-void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count,
- std::vector<uint32_t>* into,
- const std::vector<int64_t>& map) {
- for (size_t i = 0; i < from_ids_count; i++) {
- int from_id = from_ids_buf[i];
- if (from_id == kOptionalTensor) {
- into->push_back(from_id);
- } else {
- into->push_back(map[from_id]);
- }
- }
-}
-
-// Adds the operations and their parameters to the NN API model.
-// 'next-id' is the operand ID of the next operand of the model.
-TfLiteStatus AddOpsAndParams(
- ::tflite::Interpreter* interpreter, ANeuralNetworksModel* nn_model,
- uint32_t next_id, std::vector<int>* model_state_inputs,
- std::vector<int>* model_state_outputs,
- const std::vector<int64_t>& tensor_id_to_nnapi_id) {
- for (size_t i = 0; i < interpreter->nodes_size(); i++) {
- const auto* node_and_registration = interpreter->node_and_registration(i);
- const TfLiteNode& node = node_and_registration->first;
- const TfLiteRegistration& registration = node_and_registration->second;
- ::tflite::BuiltinOperator builtin =
- static_cast<::tflite::BuiltinOperator>(registration.builtin_code);
-
- // Add the parameters.
- std::vector<uint32_t> augmented_inputs, augmented_outputs;
- MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs,
- tensor_id_to_nnapi_id);
- MapAndAddTensorIds(node.outputs->data, node.outputs->size,
- &augmented_outputs, tensor_id_to_nnapi_id);
-
- auto add_scalar_int32 = [&nn_model, &augmented_inputs,
- &next_id](int value) {
- ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
- CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
- sizeof(int32_t)))
- augmented_inputs.push_back(next_id++);
- };
-
- auto add_scalar_float32 = [&nn_model, &augmented_inputs,
- &next_id](float value) {
- ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_FLOAT32};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
- CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
- sizeof(float)))
- augmented_inputs.push_back(next_id++);
- };
-
- auto add_vector_int32 = [&](const int* values, uint32_t num_values) {
- ANeuralNetworksOperandType operand_type{
- .type = ANEURALNETWORKS_TENSOR_INT32,
- .dimensionCount = 1,
- .dimensions = &num_values};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
- CHECK_NN(ANeuralNetworksModel_setOperandValue(
- nn_model, next_id, values, sizeof(int32_t) * num_values));
- augmented_inputs.push_back(next_id++);
- };
-
- // Handle state tensors of RNN, LSTM, SVDF.
- // For each state_out tensor, a corresponding state_in operand needs to be
- // created for NNAPI.
- auto duplicate_state_tensor_float32 =
- [interpreter, &nn_model, &next_id, &augmented_inputs,
- &model_state_inputs, &model_state_outputs](int tensor_id) {
- const TfLiteTensor* tensor = interpreter->tensor(tensor_id);
- ANeuralNetworksOperandType operand_type{
- ANEURALNETWORKS_TENSOR_FLOAT32,
- static_cast<uint32_t>(tensor->dims->size),
- reinterpret_cast<uint32_t*>(tensor->dims->data),
- tensor->params.scale, tensor->params.zero_point};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
- augmented_inputs.push_back(next_id);
- model_state_inputs->push_back(next_id);
- model_state_outputs->push_back(tensor_id);
- next_id++;
- };
- auto check_and_add_activation = [&add_scalar_int32](int activation) {
- if (activation > kTfLiteActRelu6) {
- logError("NNAPI only supports RELU, RELU1 and RELU6 activations");
- return kTfLiteError;
- }
- add_scalar_int32(activation);
- return kTfLiteOk;
- };
-
- auto add_add_params = [&add_scalar_int32](void* data) {
- auto* builtin = reinterpret_cast<TfLiteAddParams*>(data);
- if (builtin->activation > kTfLiteActRelu6) {
- logError("NNAPI only supports RELU, RELU1 and RELU6 activations");
- return kTfLiteError;
- }
- add_scalar_int32(builtin->activation);
- return kTfLiteOk;
- };
-
- auto add_pooling_params = [&add_scalar_int32,
- &check_and_add_activation](void* data) {
- auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
- add_scalar_int32(builtin->padding);
- add_scalar_int32(builtin->stride_width);
- add_scalar_int32(builtin->stride_height);
- add_scalar_int32(builtin->filter_width);
- add_scalar_int32(builtin->filter_height);
- return check_and_add_activation(builtin->activation);
- };
-
- auto add_convolution_params = [&add_scalar_int32,
- &check_and_add_activation](void* data) {
- auto builtin = reinterpret_cast<TfLiteConvParams*>(data);
- add_scalar_int32(builtin->padding);
- add_scalar_int32(builtin->stride_width);
- add_scalar_int32(builtin->stride_height);
- return check_and_add_activation(builtin->activation);
- };
-
- auto add_depthwise_conv_params = [&add_scalar_int32,
- &check_and_add_activation](void* data) {
- auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data);
- add_scalar_int32(builtin->padding);
- add_scalar_int32(builtin->stride_width);
- add_scalar_int32(builtin->stride_height);
- add_scalar_int32(builtin->depth_multiplier);
- return check_and_add_activation(builtin->activation);
- };
-
- auto add_fully_connected_params = [&check_and_add_activation](void* data) {
- auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data);
- return check_and_add_activation(builtin->activation);
- };
-
- auto add_concatenation_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(data);
- add_scalar_int32(builtin->axis);
- if (builtin->activation != kTfLiteActNone) {
- logError("Concatenation does not support fused activation in NNAPI");
- return kTfLiteError;
- }
- return kTfLiteOk;
- };
-
- auto add_softmax_params = [&add_scalar_float32](void* data) {
- auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(data);
- add_scalar_float32(builtin->beta);
- };
-
- auto add_space_to_depth_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data);
- add_scalar_int32(builtin->block_size);
- };
-
- auto add_lstm_params = [&add_scalar_int32,
- &add_scalar_float32](void* data) {
- auto builtin = reinterpret_cast<TfLiteLSTMParams*>(data);
- add_scalar_int32(builtin->activation);
- add_scalar_float32(builtin->cell_clip);
- add_scalar_float32(builtin->proj_clip);
- };
-
- // LSTM in NNAPI requires scratch tensor as an output operand.
- auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model,
- &next_id, &augmented_outputs]() {
- if (node.temporaries->size == 0) return;
- int scratch_buffer_index = node.temporaries->data[0];
- const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index);
- ANeuralNetworksOperandType operand_type{
- ANEURALNETWORKS_TENSOR_FLOAT32,
- static_cast<uint32_t>(tensor->dims->size),
- reinterpret_cast<uint32_t*>(tensor->dims->data), tensor->params.scale,
- tensor->params.zero_point};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
- augmented_outputs.insert(augmented_outputs.begin(), next_id++);
- };
-
- auto add_mean_params = [&add_scalar_int32](void* data) {
-#ifdef OBS_BUILD
- auto builtin = reinterpret_cast<TfLiteMeanParams*>(data);
-#else
- auto builtin = reinterpret_cast<TfLiteReducerParams*>(data);
-#endif
- add_scalar_int32(builtin->keep_dims);
- };
-
- auto add_svdf_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteSVDFParams*>(data);
- add_scalar_int32(builtin->rank);
- add_scalar_int32(builtin->activation);
- };
-
- auto add_rnn_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteRNNParams*>(data);
- add_scalar_int32(builtin->activation);
- };
-
- auto add_squeeze_params = [&](void* data) {
- const auto* builtin = reinterpret_cast<TfLiteSqueezeParams*>(data);
- // Note that we add the squeeze dimensions even if the dimensions were
- // unspecified (empty), as NNAPI requires the operand.
- add_vector_int32(builtin->squeeze_dims,
- static_cast<uint32_t>(builtin->num_squeeze_dims));
- };
-
- // Handle optional input tensors.
- auto add_optional_tensors = [&nn_model, &augmented_inputs,
- &next_id](int nn_type) {
- for (size_t idx = 0; idx < augmented_inputs.size(); idx++) {
- if (augmented_inputs[idx] == kOptionalTensor) {
- const std::vector<uint32_t> dim = {0, 0};
- ANeuralNetworksOperandType operand_type{nn_type, 2, dim.data(), 0, 0};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
- CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id,
- nullptr, 0))
- augmented_inputs[idx] = next_id++;
- }
- }
- };
-
- int nnapi_version = 10;
-#include "nnapi_delegate_ex_AddOpsAndParams_lambda.inc"
-
- ANeuralNetworksOperationType nn_op_type;
-
- // Using namespace directive to minimize diff with upstream tensorflow
- namespace tflite = ::tflite;
-
- switch (builtin) {
- case tflite::BuiltinOperator_ADD:
- nn_op_type = ANEURALNETWORKS_ADD;
- RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data));
- break;
- case tflite::BuiltinOperator_MUL:
- nn_op_type = ANEURALNETWORKS_MUL;
- RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data));
- break;
- case tflite::BuiltinOperator_AVERAGE_POOL_2D:
- RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
- break;
- case tflite::BuiltinOperator_MAX_POOL_2D:
- RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
- break;
- case tflite::BuiltinOperator_L2_POOL_2D:
- RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
- break;
- case tflite::BuiltinOperator_CONV_2D: {
- auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data);
- if (builtin->dilation_width_factor != 1 ||
- builtin->dilation_height_factor != 1 || node.inputs->size != 3) {
- logError("NNAPI does not support dilated Conv2D.");
- return kTfLiteError;
- }
- }
- RETURN_ERROR_IF_TFLITE_FAILED(
- add_convolution_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_CONV_2D;
- break;
- case tflite::BuiltinOperator_RELU:
- nn_op_type = ANEURALNETWORKS_RELU;
- break;
- case tflite::BuiltinOperator_RELU_N1_TO_1:
- nn_op_type = ANEURALNETWORKS_RELU1;
- break;
- case tflite::BuiltinOperator_RELU6:
- nn_op_type = ANEURALNETWORKS_RELU6;
- break;
- case tflite::BuiltinOperator_TANH:
- nn_op_type = ANEURALNETWORKS_TANH;
- break;
- case tflite::BuiltinOperator_FLOOR:
- nn_op_type = ANEURALNETWORKS_FLOOR;
- break;
- case tflite::BuiltinOperator_LOGISTIC:
- nn_op_type = ANEURALNETWORKS_LOGISTIC;
- break;
- case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
- RETURN_ERROR_IF_TFLITE_FAILED(
- add_depthwise_conv_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
- break;
- case tflite::BuiltinOperator_CONCATENATION:
- RETURN_ERROR_IF_TFLITE_FAILED(
- add_concatenation_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_CONCATENATION;
- break;
- case tflite::BuiltinOperator_SOFTMAX:
- add_softmax_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_SOFTMAX;
- break;
- case tflite::BuiltinOperator_FULLY_CONNECTED:
- RETURN_ERROR_IF_TFLITE_FAILED(
- add_fully_connected_params(node.builtin_data));
- nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
- break;
- case tflite::BuiltinOperator_RESHAPE:
- if (node.inputs->size != 2) {
- logError("NNAPI only supports 2-input RESHAPE");
- return kTfLiteError;
- }
- nn_op_type = ANEURALNETWORKS_RESHAPE;
- // add_reshape_params(node.builtin_data);
- break;
- case tflite::BuiltinOperator_RESIZE_BILINEAR:
- add_resize_bilinear_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
- break;
- case tflite::BuiltinOperator_SPACE_TO_DEPTH:
- add_space_to_depth_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
- break;
- case tflite::BuiltinOperator_LSTM: {
- if (node.inputs->size + /* no of params */ 3 != 21) {
- logError("NNAPI only supports 21-input LSTMs");
- return kTfLiteError;
- }
- duplicate_state_tensor_float32(
- node.outputs->data[/*kOutputStateTensor*/ 0]);
- duplicate_state_tensor_float32(
- node.outputs->data[/*kCellStateTensor*/ 1]);
- add_lstm_params(node.builtin_data);
- add_lstm_scratch_tensor_float32();
- add_optional_tensors(ANEURALNETWORKS_TENSOR_FLOAT32);
- nn_op_type = ANEURALNETWORKS_LSTM;
- break;
- }
- case tflite::BuiltinOperator_DEQUANTIZE:
- nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
- break;
- case tflite::BuiltinOperator_SVDF: {
- duplicate_state_tensor_float32(node.outputs->data[/*kStateTensor*/ 0]);
- add_svdf_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_SVDF;
- break;
- }
- case tflite::BuiltinOperator_RNN: {
- duplicate_state_tensor_float32(
- node.outputs->data[/*kHiddenStateTensor*/ 0]);
- add_rnn_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_RNN;
- break;
- }
- case tflite::BuiltinOperator_EMBEDDING_LOOKUP:
- nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
- break;
- case tflite::BuiltinOperator_PAD:
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_PAD;
- break;
- case tflite::BuiltinOperator_MEAN:
- nnapi_version = 11; // require NNAPI 1.1
- add_mean_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_MEAN;
- break;
- case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
- nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
- add_lrn_params(node.builtin_data);
- break;
- case tflite::BuiltinOperator_DIV:
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_DIV;
- RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation(
- reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation));
- break;
- case tflite::BuiltinOperator_SUB:
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_SUB;
- RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation(
- reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation));
- break;
- case tflite::BuiltinOperator_SQUEEZE:
- nnapi_version = 11; // requires NNAPI 1.1
- add_squeeze_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_SQUEEZE;
- break;
- case tflite::BuiltinOperator_TRANSPOSE:
- // The permutation input tensor value dictates the output dimensions.
- // TODO(b/110888333): Support dynamically-sized tensors in delegates.
- if ((node.inputs->size > 1) &&
- (interpreter->tensor(node.inputs->data[1])->allocation_type !=
- kTfLiteMmapRo)) {
- logError("NNAPI does not yet support dynamic tensors.");
- return kTfLiteError;
- }
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_TRANSPOSE;
- break;
- case tflite::BuiltinOperator_L2_NORMALIZATION:
- nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
- if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data)
- ->activation != kTfLiteActNone) {
- logError(
- "NNAPI does not support L2Normalization with fused activations");
- return kTfLiteError;
- }
- if ((node.inputs->size > 0) &&
- (interpreter->tensor(node.inputs->data[0])->dims->size != 4)) {
- logError("NNAPI only supports input rank 4 for L2Normalization");
- return kTfLiteError;
- }
- break;
- case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
- if (interpreter->tensor(node.outputs->data[0])->type !=
- kTfLiteFloat32) {
- logError("NNAPI only support HASHTABLE_LOOKUP with float32 output",
- builtin);
- return kTfLiteError;
- }
- nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
- break;
- case tflite::BuiltinOperator_STRIDED_SLICE:
- add_strided_slice_params(node.builtin_data);
- nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
- break;
- case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
- break;
- case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
- check_batch_to_space_params();
- break;
- case tflite::BuiltinOperator_CAST:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_CAST_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_TOPK_V2:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_TOPK_V2_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_GATHER:
- add_gather_ex_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_GATHER_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_SPLIT:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_SPLIT_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_NEG:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_NEG_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_EXP:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_EXP_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_TRANSPOSE_CONV:
- add_transpose_conv_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_TRANSPOSE_CONV_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_PRELU:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_PRELU_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_ARG_MAX:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_ARGMAX_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
-#ifndef OBS_BUILD
- case tflite::BuiltinOperator_PACK:
- add_pack_ex_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_PACK_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_UNPACK:
- add_unpack_ex_params(node.builtin_data);
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_UNPACK_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_SQRT:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_SQRT_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_RSQRT:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_RSQRT_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_EQUAL:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_EQUAL_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_NOT_EQUAL:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_NOT_EQUAL_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_SUM:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_REDUCE_SUM_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_REDUCE_MAX:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_TENSORFLOW_MAX_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_REDUCE_MIN:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_REDUCE_MIN_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_LOGICAL_AND:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_LOGICAL_AND_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- case tflite::BuiltinOperator_LOGICAL_OR:
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_LOGICAL_OR_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
-#endif
- case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
- case tflite::BuiltinOperator_LSH_PROJECTION:
- case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
- case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN:
- case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
- case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
- case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
- //case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
- case tflite::BuiltinOperator_PADV2:
- //case tflite::BuiltinOperator_RESIZE_BILINEAR:
- case tflite::BuiltinOperator_CALL:
- case tflite::BuiltinOperator_SKIP_GRAM:
- //case tflite::BuiltinOperator_RELU_N1_TO_1:
- //case tflite::BuiltinOperator_GATHER:
- //case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
- //case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
- //case tflite::BuiltinOperator_TOPK_V2:
- //case tflite::BuiltinOperator_SPLIT:
- //case tflite::BuiltinOperator_STRIDED_SLICE:
- //case tflite::BuiltinOperator_EXP:
- case tflite::BuiltinOperator_LOG_SOFTMAX:
- //case tflite::BuiltinOperator_DEQUANTIZE:
- case tflite::BuiltinOperator_DELEGATE:
- //case tflite::BuiltinOperator_CAST:
- //case tflite::BuiltinOperator_PRELU:
- case tflite::BuiltinOperator_MAXIMUM:
- case tflite::BuiltinOperator_MINIMUM:
-#ifndef OBS_BUILD
- case tflite::BuiltinOperator_ARG_MIN:
-#endif
- case tflite::BuiltinOperator_GREATER:
- case tflite::BuiltinOperator_GREATER_EQUAL:
- case tflite::BuiltinOperator_LESS:
- case tflite::BuiltinOperator_LESS_EQUAL:
- //case tflite::BuiltinOperator_NEG:
- case tflite::BuiltinOperator_SELECT:
- case tflite::BuiltinOperator_SLICE:
- case tflite::BuiltinOperator_SIN:
- //case tflite::BuiltinOperator_LOG:
- //case tflite::BuiltinOperator_TRANSPOSE_CONV:
-#ifndef OBS_BUILD
- case tflite::BuiltinOperator_TILE:
- case tflite::BuiltinOperator_EXPAND_DIMS:
- case tflite::BuiltinOperator_SPARSE_TO_DENSE:
- //case tflite::BuiltinOperator_EQUAL:
- //case tflite::BuiltinOperator_NOT_EQUAL:
- //case tflite::BuiltinOperator_SUM:
- //case tflite::BuiltinOperator_REDUCE_MAX:
- //case tflite::BuiltinOperator_REDUCE_MIN:
- case tflite::BuiltinOperator_REDUCE_PROD:
- //case tflite::BuiltinOperator_SQRT:
- //case tflite::BuiltinOperator_RSQRT:
- case tflite::BuiltinOperator_SHAPE:
- case tflite::BuiltinOperator_POW:
- case tflite::BuiltinOperator_FAKE_QUANT:
- //case tflite::BuiltinOperator_PACK:
- //case tflite::BuiltinOperator_LOGICAL_OR:
- case tflite::BuiltinOperator_ONE_HOT:
- //case tflite::BuiltinOperator_LOGICAL_AND:
- case tflite::BuiltinOperator_LOGICAL_NOT:
- //case tflite::BuiltinOperator_UNPACK:
- case tflite::BuiltinOperator_FLOOR_DIV:
- case tflite::BuiltinOperator_REDUCE_ANY:
- case tflite::BuiltinOperator_SQUARE:
- case tflite::BuiltinOperator_ZEROS_LIKE:
- case tflite::BuiltinOperator_FILL:
-#endif
- logError("Op code %d is currently not delegated to NNAPI", builtin);
- return kTfLiteError;
- break;
- case tflite::BuiltinOperator_CUSTOM: {
- std::string custom_name(registration.custom_name);
- if (custom_name.compare("TensorFlowMax") == 0) {
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_TENSORFLOW_MAX_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- }
- else if (custom_name.compare("SquaredDifference") == 0) {
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_SQUARED_DIFFERENCE_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- }
- else if (custom_name.compare("TensorFlowSum") == 0) {
- CHECK_NN(ANeuralNetworksModel_addOperationEx(
- nn_model, ANEURALNETWORKS_REDUCE_SUM_EX,
- static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(node.outputs->size),
- reinterpret_cast<uint32_t*>(node.outputs->data)));
- continue;
- }
- logError("Custom operations are not supported when using NNAPI.");
- return kTfLiteError;
- break;
- }
-#ifdef OBS_BUILD
- default:
- logError("Op code %d is currently not delegated to NNAPI", builtin);
- return kTfLiteError;
- break;
-#endif
- }
-
- //if (nnapi_version == 11 && GetAndroidSdkVersionCached() < 28) {
- // FATAL("Op %d needs NNAPI1.1", builtin);
- //}
-
- // Add the operation.
- RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation(
- nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
- augmented_inputs.data(),
- static_cast<uint32_t>(augmented_outputs.size()),
- reinterpret_cast<uint32_t*>(augmented_outputs.data())));
- }
- return kTfLiteOk;
-}
-
-TfLiteStatus NNAPIDelegate::BuildGraph(::tflite::Interpreter* interpreter) {
- if (nn_model_ && nn_compiled_model_) return model_status_;
-
- // TODO(aselle): This is not correct. need to handle resize invalidation.
- if (!nn_model_) {
- CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
-
- // Find which tensors should be added to NNAPI. TFLite has temporaries
- // and RNN back-edges which are are not valid for NNAPI. We look through all
- // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with
- // kOperandIdNotSet. addTensorOperands will replace those with the
- // corresponding NNAPI operand ids and skip kOperandNotNeeded entries.
- std::vector<int64_t> tensor_id_to_nnapi_id(interpreter->tensors_size(),
- kOperandNotNeeded);
- auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf,
- size_t count) {
- for (int j = 0; j < count; j++) {
- auto tensor_id = buf[j];
- if (tensor_id != kOptionalTensor) {
- tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet;
- }
- }
- };
- for (size_t i = 0; i < interpreter->nodes_size(); i++) {
- const auto* node_and_registration = interpreter->node_and_registration(i);
- const TfLiteNode& node = node_and_registration->first;
- set_ids_to_not_set(node.inputs->data, node.inputs->size);
- set_ids_to_not_set(node.outputs->data, node.outputs->size);
- }
- set_ids_to_not_set(interpreter->inputs().data(),
- interpreter->inputs().size());
- set_ids_to_not_set(interpreter->outputs().data(),
- interpreter->outputs().size());
-
- uint32_t next_id = 0;
- RETURN_ERROR_IF_TFLITE_FAILED(addTensorOperands(
- interpreter, nn_model_, &next_id, &tensor_id_to_nnapi_id));
- RETURN_ERROR_IF_TFLITE_FAILED(
- AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
- &model_states_outputs_, tensor_id_to_nnapi_id));
-
- std::vector<uint32_t> augmented_inputs;
- MapAndAddTensorIds(interpreter->inputs().data(),
- interpreter->inputs().size(), &augmented_inputs,
- tensor_id_to_nnapi_id);
- augmented_inputs.insert(augmented_inputs.end(),
- model_states_inputs_.begin(),
- model_states_inputs_.end());
- std::vector<uint32_t> augmented_outputs;
- MapAndAddTensorIds(interpreter->outputs().data(),
- interpreter->outputs().size(), &augmented_outputs,
- tensor_id_to_nnapi_id);
- MapAndAddTensorIds(model_states_outputs_.data(),
- model_states_outputs_.size(), &augmented_outputs,
- tensor_id_to_nnapi_id);
-
- CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
- nn_model_, static_cast<uint32_t>(augmented_inputs.size()),
- reinterpret_cast<const uint32_t*>(augmented_inputs.data()),
- static_cast<uint32_t>(augmented_outputs.size()),
- reinterpret_cast<const uint32_t*>(augmented_outputs.data())));
-
- // TODO Support ANeuralNetworksModel_relaxComputationFloat32toFloat16
- //if (GetAndroidSdkVersionCached() >= 28) {
- // CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16(
- // nn_model_, interpreter->GetAllowFp16PrecisionForFp32()));
- //}
- CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
- }
- if (!nn_compiled_model_) {
- CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_));
- CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_));
- }
- return kTfLiteOk;
-}
-
-#include <unordered_map>
-
-TfLiteStatus NNAPIDelegate::Invoke(::tflite::Interpreter* interpreter) {
- if (!nn_model_) {
- model_status_ = BuildGraph(interpreter);
- if (model_status_ != kTfLiteOk) {
- logError("Failed to build graph for NNAPI");
- }
- }
- if (model_status_ != kTfLiteOk) {
- return model_status_;
- }
-
- ANeuralNetworksExecution* execution = nullptr;
- CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution));
-
- // Allocate temporary buffer to save casted boolean tensor
- std::unordered_map<size_t, uint8_t*> input_boolean_tensors;
- std::unordered_map<size_t, uint8_t*> output_boolean_tensors;
- for (size_t i = 0; i < interpreter->inputs().size(); i++)
- {
- int input = interpreter->inputs()[i];
- TfLiteTensor* tensor = interpreter->tensor(input);
- if (tensor->type == kTfLiteBool)
- {
- size_t elements = tensor->bytes / sizeof(bool);
- uint8_t* temp_tensor = new uint8_t[tensor->bytes / sizeof(bool)];
- input_boolean_tensors[i] = temp_tensor;
- for (size_t idx = 0; idx < elements; idx++)
- {
- temp_tensor[idx] = (tensor->data.b[idx] ? 0x00 : 0xff);
- }
- }
- }
- for (size_t i = 0; i < interpreter->outputs().size(); i++)
- {
- int output = interpreter->outputs()[i];
- TfLiteTensor* tensor = interpreter->tensor(output);
- if (tensor->type == kTfLiteBool)
- {
- uint8_t* temp_tensor = new uint8_t[tensor->bytes / sizeof(bool)];
- output_boolean_tensors[i] = temp_tensor;
- }
- }
-
- // Currently perform deep copy of input buffer
- for (size_t i = 0; i < interpreter->inputs().size(); i++) {
- int input = interpreter->inputs()[i];
- // TODO(aselle): Is this what we want or do we want input instead?
- // TODO(aselle): This should be called setInputValue maybe to be cons.
- TfLiteTensor* tensor = interpreter->tensor(input);
- if (tensor->type == kTfLiteBool)
- {
- CHECK_NN(ANeuralNetworksExecution_setInput(
- execution, i, nullptr, input_boolean_tensors[i], tensor->bytes * sizeof(uint8_t) / sizeof(bool)));
- }
- else
- {
- CHECK_NN(ANeuralNetworksExecution_setInput(
- execution, i, nullptr, tensor->data.raw, tensor->bytes));
- }
- }
-
- // Tell nn api where to place final data.
- for (size_t i = 0; i < interpreter->outputs().size(); i++) {
- int output = interpreter->outputs()[i];
- TfLiteTensor* tensor = interpreter->tensor(output);
-
- if (tensor->type == kTfLiteBool)
- {
- CHECK_NN(ANeuralNetworksExecution_setOutput(
- execution, i, nullptr, output_boolean_tensors[i], tensor->bytes * sizeof(uint8_t) / sizeof(bool)));
- }
- else
- {
- CHECK_NN(ANeuralNetworksExecution_setOutput(
- execution, i, nullptr, tensor->data.raw, tensor->bytes));
- }
- }
-
- // The state_out of previous invocation need to be mapped to state_in of
- // current invocation.
- for (size_t i = 0; i < model_states_outputs_.size(); i++) {
- int state_tensor_idx = model_states_outputs_[i];
- TfLiteTensor* tensor = interpreter->tensor(state_tensor_idx);
- // Here we are using a deep copy for state_in tensors so that we are not
- // reading and writing into the same buffer during a invocation.
- // TODO(miaowang): using double shared buffer to minimize the copies.
- CHECK_NN(ANeuralNetworksExecution_setInput(
- execution, i + interpreter->inputs().size(), nullptr, tensor->data.raw,
- tensor->bytes));
- // Tell NNAPI where to output the state_out.
- CHECK_NN(ANeuralNetworksExecution_setOutput(
- execution, i + interpreter->outputs().size(), nullptr, tensor->data.raw,
- tensor->bytes));
- }
-
- // Currently use blocking compute.
- ANeuralNetworksEvent* event = nullptr;
- CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event));
- CHECK_NN(ANeuralNetworksEvent_wait(event));
- ANeuralNetworksEvent_free(event);
- ANeuralNetworksExecution_free(execution);
-
- // Tell nn api where to place final data.
- for (size_t i = 0; i < interpreter->inputs().size(); i++) {
- int input = interpreter->inputs()[i];
- TfLiteTensor* tensor = interpreter->tensor(input);
-
- if (tensor->type == kTfLiteBool)
- {
- uint8_t* temp_tensor = input_boolean_tensors[i];
- input_boolean_tensors[i] = nullptr;
- delete temp_tensor;
- }
- }
- for (size_t i = 0; i < interpreter->outputs().size(); i++) {
- int output = interpreter->outputs()[i];
- TfLiteTensor* tensor = interpreter->tensor(output);
-
- if (tensor->type == kTfLiteBool)
- {
- uint8_t* temp_tensor = output_boolean_tensors[i];
- size_t elements = tensor->bytes / sizeof(bool);
- for (size_t idx = 0; idx < elements; idx++)
- {
- tensor->data.b[idx] = ((temp_tensor[idx] == 0x00) ? false : true);
- }
- output_boolean_tensors[i] = nullptr;
- delete temp_tensor;
- }
- }
-
-#if 0
- printf("From the NN API:\n");
- TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]);
- if (float* data =
- interpreter->typed_tensor<float>(interpreter->outputs()[0])) {
- size_t num = tensor->bytes / sizeof(float);
- for (float* p = data; p < data + num; p++) {
- printf(" %f", *p);
- }
- printf("\n");
- }
-#endif
-
- return kTfLiteOk;
-}
-
-bool NNAPIDelegate::IsSupported() { return nnfw::NNAPIExists(); }
-
-} // namespace tflite
-} // namespace nnfw
-
-// clang-format on
diff --git a/libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc b/libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
deleted file mode 100644
index a91e4de60..000000000
--- a/libs/tflite/src/ext/nnapi_delegate_ex_AddOpsAndParams_lambda.inc
+++ /dev/null
@@ -1,106 +0,0 @@
-// This file is included from AddOpsAndParams defined in nnapi_delegate.cc
-// and contains lambda for extened implementation to original Tensorflow Lite.
- auto add_resize_bilinear_params = [&add_scalar_int32, &interpreter, &augmented_inputs](void* data) {
- auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(data);
- if (builtin->align_corners) {
- FATAL("Resize bilinear does not support align corners in NNAPI");
- }
-
- TfLiteTensor* tensor = interpreter->tensor(augmented_inputs.back());
- assert(tensor->type == kTfLiteInt32);
- assert(tensor->bytes == sizeof(int)*2);
- augmented_inputs.pop_back();
-
- int height = ((int*)(tensor->data.raw))[1];
- int width = ((int*)(tensor->data.raw))[0];
- add_scalar_int32(height);
- add_scalar_int32(width);
- };
-
- auto check_l2normalization_params = [interpreter, &node](void* data) {
- auto builtin = reinterpret_cast<TfLiteL2NormParams*>(data);
- if (builtin->activation != kTfLiteActNone) {
- FATAL("NNAPI does not support L2Normalization with fused activations");
- }
- if ((node.inputs->size > 0) &&
- (interpreter->tensor(node.inputs->data[0])->dims->size != 4)) {
- FATAL("NNAPI only supports input rank 4 for L2Normalization");
- }
- };
-
- auto add_transpose_conv_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(data);
- add_scalar_int32(builtin->padding);
- add_scalar_int32(builtin->stride_width);
- add_scalar_int32(builtin->stride_height);
- };
-
- auto add_lrn_params = [&add_scalar_int32,
- &add_scalar_float32](void* data) {
- auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(data);
- add_scalar_int32(builtin->radius);
- add_scalar_float32(builtin->bias);
- add_scalar_float32(builtin->alpha);
- add_scalar_float32(builtin->beta);
- };
-
- auto add_strided_slice_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(data);
- add_scalar_int32(builtin->begin_mask);
- add_scalar_int32(builtin->end_mask);
- // ellipsis_mask and new_axis_mask are not supported on nn runtime
- // cf) tflite interpreter supports both operations
- if (builtin->ellipsis_mask) {
- FATAL("STRIDE_SLICE does not support ellipsis_mask in NNAPI");
- }
- if (builtin->new_axis_mask) {
- FATAL("STRIDE_SLICE does not support new_axis_mask in NNAPI");
- }
- add_scalar_int32(builtin->shrink_axis_mask);
- };
-
- auto add_gather_ex_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteGatherParams*>(data);
- add_scalar_int32(builtin->axis);
- if (builtin->axis != 0) {
- FATAL("GATHER does not support axis>0 in NNAPI");
- }
- };
-
-#ifndef OBS_BUILD
- auto add_pack_ex_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLitePackParams*>(data);
- add_scalar_int32(builtin->values_count);
- add_scalar_int32(builtin->axis);
- };
-
- auto add_unpack_ex_params = [&add_scalar_int32](void* data) {
- auto builtin = reinterpret_cast<TfLiteUnpackParams*>(data);
- add_scalar_int32(builtin->num);
- add_scalar_int32(builtin->axis);
- };
-#endif
-
- auto check_batch_to_space_params = [interpreter, &node, &augmented_inputs]() {
-
- //If there are 3 inputs, check if crops is having default values {0, 0, 0, 0}
- //Else unsupported by NNAPI
-
- if(augmented_inputs.size() == 3)
- {
- const uint32_t crops_buffer_index = node.inputs->data[2];
- const TfLiteTensor* crops = interpreter->tensor(crops_buffer_index);
- const int *crops_value = crops->data.i32;
-
- //Check if crops is having default values {0, 0, 0, 0}
- if(crops_value[0] != 0 || crops_value[1] != 0 || crops_value[2] != 0 || crops_value[3] != 0)
- {
- FATAL("BATCH_TO_SPACE_ND does not support Explicit crops in NNAPI");
- }
- else
- {
- //Restrict crops input and pass only other two inputs
- augmented_inputs.pop_back();
- }
- }
- };
diff --git a/libs/tflite/src/interp/FlatBufferBuilder.cpp b/libs/tflite/src/interp/FlatBufferBuilder.cpp
deleted file mode 100644
index 4b9cde719..000000000
--- a/libs/tflite/src/interp/FlatBufferBuilder.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FlatBufferBuilder.h"
-
-#include "tflite/ext/kernels/register.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FlatBufferBuilder::build(void) const
-{
- std::unique_ptr<::tflite::Interpreter> interpreter;
-
- nnfw::tflite::BuiltinOpResolver resolver;
-
- ::tflite::InterpreterBuilder builder(_model, resolver);
-
- builder(&interpreter);
-
- return std::move(interpreter);
-}
-
-} // namespace tflite
-} // namespace nnfw
diff --git a/libs/tflite/src/interp/FunctionBuilder.cpp b/libs/tflite/src/interp/FunctionBuilder.cpp
deleted file mode 100644
index eab940c18..000000000
--- a/libs/tflite/src/interp/FunctionBuilder.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/interp/FunctionBuilder.h"
-
-namespace nnfw
-{
-namespace tflite
-{
-
-std::unique_ptr<::tflite::Interpreter> FunctionBuilder::build(void) const
-{
- auto res = std::unique_ptr<::tflite::Interpreter>{new ::tflite::Interpreter};
-
- _fn(*res);
-
- return std::move(res);
-}
-
-} // namespace tflite
-} // namespace nnfw